Page MenuHomeFreeBSD

D4232.diff
No OneTemporary

D4232.diff

Index: etc/defaults/rc.conf
===================================================================
--- etc/defaults/rc.conf
+++ etc/defaults/rc.conf
@@ -682,6 +682,8 @@
rctl_enable="NO" # Load rctl(8) rules on boot
rctl_rules="/etc/rctl.conf" # rctl(8) ruleset. See rctl.conf(5).
+iovctl_files="" # Config files for iovctl(8)
+
##############################################################
### Jail Configuration (see rc.conf(5) manual page) ##########
##############################################################
Index: etc/rc.d/Makefile
===================================================================
--- etc/rc.d/Makefile
+++ etc/rc.d/Makefile
@@ -44,6 +44,7 @@
hostid_save \
hostname \
initrandom \
+ iovctl \
ip6addrctl \
ipfilter \
ipfs \
Index: etc/rc.d/iovctl
===================================================================
--- /dev/null
+++ etc/rc.d/iovctl
@@ -0,0 +1,39 @@
+#!/bin/sh
+#
+# $FreeBSD: head/etc/rc.d/iovctl 284891 2015-06-27 18:01:50Z pkelsey $
+#
+
+# PROVIDE: iovctl
+# REQUIRE: FILESYSTEMS sysctl
+
+. /etc/rc.subr
+
+name="iovctl"
+command="/usr/sbin/iovctl"
+start_cmd="iovctl_start"
+stop_cmd="iovctl_stop"
+
+run_iovctl()
+{
+ local _f flag
+
+ flag=$1
+ for _f in ${iovctl_files} ; do
+ if [ -r ${_f} ]; then
+ ${command} ${flag} -f ${_f} > /dev/null
+ fi
+ done
+}
+
+iovctl_start()
+{
+ run_iovctl -C
+}
+
+iovctl_stop()
+{
+ run_iovctl -D
+}
+
+load_rc_config $name
+run_rc_command "$1"
Index: etc/rc.d/netif
===================================================================
--- etc/rc.d/netif
+++ etc/rc.d/netif
@@ -26,7 +26,7 @@
#
# PROVIDE: netif
-# REQUIRE: atm1 FILESYSTEMS serial sppp sysctl
+# REQUIRE: atm1 FILESYSTEMS iovctl serial sppp sysctl
# REQUIRE: ipfilter ipfs
# KEYWORD: nojailvnet
Index: share/man/man5/rc.conf.5
===================================================================
--- share/man/man5/rc.conf.5
+++ share/man/man5/rc.conf.5
@@ -4524,6 +4524,11 @@
.Xr rctl.conf 5
ruleset to load for
.Xr rctl 8 .
+.It Va iovctl_files
+.Pq Vt str
+A space-separated list of configuration files used by
+.Xr iovctl 8 .
+The default value is an empty string.
.El
.Sh FILES
.Bl -tag -width ".Pa /etc/defaults/rc.conf" -compact
@@ -4577,6 +4582,7 @@
.Xr hcsecd 8 ,
.Xr ifconfig 8 ,
.Xr inetd 8 ,
+.Xr iovctl 8 ,
.Xr ipf 8 ,
.Xr ipfw 8 ,
.Xr ipnat 8 ,
Index: share/man/man9/Makefile
===================================================================
--- share/man/man9/Makefile
+++ share/man/man9/Makefile
@@ -194,6 +194,10 @@
p_candebug.9 \
p_cansee.9 \
pci.9 \
+ PCI_IOV_ADD_VF.9 \
+ PCI_IOV_INIT.9 \
+ pci_iov_schema.9 \
+ PCI_IOV_UNINIT.9 \
pfil.9 \
pfind.9 \
pget.9 \
Index: share/man/man9/PCI_IOV_ADD_VF.9
===================================================================
--- /dev/null
+++ share/man/man9/PCI_IOV_ADD_VF.9
@@ -0,0 +1,112 @@
+.\"
+.\" Copyright (c) 2014 Sandvine Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD: head/share/man/man9/PCI_IOV_ADD_VF.9 286663 2015-08-12 11:56:19Z brueffer $
+.\"
+.Dd May 28, 2015
+.Dt PCI_IOV_ADD_VF 9
+.Os
+.Sh NAME
+.Nm PCI_IOV_ADD_VF
+.Nd inform a PF driver that a VF is being created
+.Sh SYNOPSIS
+.In sys/bus.h
+.In machine/stdarg.h
+.In sys/nv.h
+.In dev/pci/pci_iov.h
+.Ft int
+.Fn PCI_IOV_ADD_VF "device_t dev" "uint16_t vfnum" "const nvlist_t *vf_config"
+.Sh DESCRIPTION
+The
+.Fn PCI_IOV_ADD_VF
+method is called by the PCI Single-Root I/O Virtualization
+.Pq SR-IOV
+infrastructure when it is initializating a new Virtual Function (VF) as a child
+of the given Physical Function (PF) device.
+This method will not be called until a successful call to
+.Xr PCI_IOV_INIT 9
+has been made.
+It is not guaranteed that this method will be called following a successful call
+to
+.Xr PCI_IOV_INIT 9 .
+If the infrastructure encounters a failure to allocate resources following the
+call to
+.Xr PCI_IOV_INIT 9 ,
+the VF creation will be aborted and
+.Xr PCI_IOV_UNINIT 9
+will be called immediately without any preceding calls to
+.Nm .
+.Pp
+The index of the VF being initialized is passed in the
+.Fa vfnum
+argument.
+VFs are always numbered sequentially starting at 0.
+.Pp
+If the driver requested device-specific configuration parameters via a VF schema
+in its call to
+.Xr pci_iov_attach 9 ,
+those parameters will be contained in the
+.Pa vf_config
+argument.
+All configuration parameters that were either set as required parameters or that
+had a default value set in the VF schema are guaranteed to be present in
+.Fa vf_config .
+Configuration parameters that were neither set as required nor were given a
+default value are optional and may or may not be present in
+.Fa vf_config .
+.Fa vf_config
+will not contain any configuration parameters that were not specified in the VF
+schema.
+All configuration parameters will have the correct type and will be in the range
+of valid values specified in the schema.
+.Pp
+Note that it is possible for the user to set different configuration values on
+different VF devices that are children of the same PF.
+The PF driver must not cache configuration parameters passed in previous calls
+to
+.Fn PCI_IOV_ADD_VF
+for other VFs and apply those parameters to the current VF.
+.Pp
+This function will not be called twice for the same
+.Fa vf_num
+on the same PF device without
+.Xr PCI_IOV_UNINIT 9
+and
+.Xr PCI_IOV_INIT 9
+first being called, in that order.
+.Sh RETURN VALUES
+This method returns 0 on success, otherwise an appropriate error is returned.
+If this method returns an error then the current VF device will be destroyed
+but the rest of the VF devices will be created and SR-IOV will be enabled on
+the PF.
+.Sh SEE ALSO
+.Xr nv 9 ,
+.Xr pci 9 ,
+.Xr PCI_IOV_INIT 9 ,
+.Xr pci_iov_schema 9 ,
+.Xr PCI_IOV_UNINIT 9
+.Sh AUTHORS
+This manual page was written by
+.An Ryan Stone Aq Mt rstone@FreeBSD.org .
Index: share/man/man9/PCI_IOV_INIT.9
===================================================================
--- /dev/null
+++ share/man/man9/PCI_IOV_INIT.9
@@ -0,0 +1,85 @@
+.\"
+.\" Copyright (c) 2014 Sandvine Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD: head/share/man/man9/PCI_IOV_INIT.9 286663 2015-08-12 11:56:19Z brueffer $
+.\"
+.Dd May 28, 2015
+.Dt PCI_IOV_INIT 9
+.Os
+.Sh NAME
+.Nm PCI_IOV_INIT
+.Nd enable SR-IOV on a PF device
+.Sh SYNOPSIS
+.In sys/bus.h
+.In machine/stdarg.h
+.In sys/nv.h
+.In dev/pci/pci_iov.h
+.Ft int
+.Fn PCI_IOV_INIT "device_t dev" "uint16_t num_vfs" "const nvlist_t *pf_config"
+.Sh DESCRIPTION
+The
+.Fn PCI_IOV_INIT
+method is called by the PCI Single-Root I/O Virtualization (SR-IOV)
+infrastucture when the user requests that SR-IOV be enabled on a Physical
+Function (PF).
+The number of Virtual Functions (VFs) that will be created is passed to this
+method in the
+.Fa num_vfs
+argument.
+.Pp
+If the driver requested device-specific PF configuration parameters via a PF
+schema in its call to
+.Xr pci_iov_attach 9 ,
+those parameters will be available in the
+.Fa pf_config
+argument.
+All configuration parameters that were either set as required parameters or that
+had a default value set in the PF schema are guaranteed to be present in
+.Fa pf_config .
+Configuration parameters that were neither set as required nor were given a
+default value are optional and may or may not be present in
+.Fa pf_config .
+.Fa pf_config
+will not contain any configuration parameters that were not specified in the PF
+schema.
+All configuration parameters will have the correct type and are in the range of
+valid values specified in the schema.
+.Pp
+If this method returns successfully, then this method will not be called again
+on the same device until after a call to
+.Xr PCI_IOV_UNINIT .
+.Sh RETURN VALUES
+Returns 0 on success, otherwise an appropriate error is returned.
+If this method returns an error then the SR-IOV configuration will be aborted
+and no VFs will be created.
+.Sh SEE ALSO
+.Xr nv 9 ,
+.Xr pci 9 ,
+.Xr PCI_IOV_ADD_VF 9 ,
+.Xr pci_iov_schema 9 ,
+.Xr PCI_IOV_UNINIT 9
+.Sh AUTHORS
+This manual page was written by
+.An Ryan Stone Aq Mt rstone@FreeBSD.org .
Index: share/man/man9/PCI_IOV_UNINIT.9
===================================================================
--- /dev/null
+++ share/man/man9/PCI_IOV_UNINIT.9
@@ -0,0 +1,63 @@
+.\"
+.\" Copyright (c) 2014 Sandvine Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD: head/share/man/man9/PCI_IOV_UNINIT.9 283670 2015-05-28 22:01:50Z jhb $
+.\"
+.Dd May 28, 2015
+.Dt PCI_IOV_UNINIT 9
+.Os
+.Sh NAME
+.Nm PCI_IOV_UNINIT
+.Nd disable SR-IOV on a PF device
+.Sh SYNOPSIS
+.In sys/bus.h
+.In dev/pci/pci_iov.h
+.Ft void
+.Fn PCI_IOV_UNINIT "device_t dev"
+.Sh DESCRIPTION
+The
+.Fn PCI_IOV_UNINIT
+method is called by the PCI Single-Root I/O Virtualization (SR-IOV)
+infrastructure when the user requests that SR-IOV be disabled on a Physical
+Function (PF).
+When this method is called, the PF driver must release any SR-IOV-related
+resources that it has allocated and disable any device-specific SR-IOV
+configuration in the device.
+.Pp
+This method will only be called following a successful call to
+.Xr PCI_IOV_INIT .
+It is not guaranteed that
+.Xr PCI_IOV_ADD_VF
+will have been called for any Virtual Function (VF) after the call to
+.Xr PCI_IOV_INIT
+and before the call to
+.Nm .
+.Sh SEE ALSO
+.Xr pci 9 ,
+.Xr PCI_IOV_ADD_VF 9 ,
+.Xr PCI_IOV_INIT 9
+.Sh AUTHORS
+This manual page was written by
+.An Ryan Stone Aq Mt rstone@FreeBSD.org .
Index: share/man/man9/pci_iov_schema.9
===================================================================
--- /dev/null
+++ share/man/man9/pci_iov_schema.9
@@ -0,0 +1,265 @@
+.\"
+.\" Copyright (c) 2014 Sandvine Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD: head/share/man/man9/pci_iov_schema.9 285273 2015-07-08 16:16:44Z pkelsey $
+.\"
+.Dd July 8, 2015
+.Dt PCI_IOV_SCHEMA 9
+.Os
+.Sh NAME
+.Nm pci_iov_schema ,
+.Nm pci_iov_schema_alloc_node ,
+.Nm pci_iov_schema_add_bool ,
+.Nm pci_iov_schema_add_string ,
+.Nm pci_iov_schema_add_uint8 ,
+.Nm pci_iov_schema_add_uint16 ,
+.Nm pci_iov_schema_add_uint32 ,
+.Nm pci_iov_schema_add_uint64 ,
+.Nm pci_iov_schema_add_unicast_mac
+.Nd PCI SR-IOV config schema interface
+.Sh SYNOPSIS
+.In machine/stdarg.h
+.In sys/nv.h
+.In sys/iov_schema.h
+.Ft nvlist_t *
+.Fn pci_iov_schema_alloc_node "void"
+.Ft void
+.Fn pci_iov_schema_add_bool "nvlist_t *schema" "const char *name" \
+"uint32_t flags" "int defaultVal"
+.Ft void
+.Fn pci_iov_schema_add_string "nvlist_t *schema" "const char *name" \
+"uint32_t flags" "const char *defaultVal"
+.Ft void
+.Fn pci_iov_schema_add_uint8 "nvlist_t *schema" "const char *name" \
+"uint32_t flags" "uint8_t defaultVal"
+.Ft void
+.Fn pci_iov_schema_add_uint16 "nvlist_t *schema" "const char *name" \
+"uint32_t flags" "uint16_t defaultVal"
+.Ft void
+.Fn pci_iov_schema_add_uint32 "nvlist_t *schema" "const char *name" \
+"uint32_t flags" "uint32_t defaultVal"
+.Ft void
+.Fn pci_iov_schema_add_uint64 "nvlist_t *schema" "const char *name" \
+"uint32_t flags" "uint64_t defaultVal"
+.Ft void
+.Fn pci_iov_schema_add_unicast_mac "nvlist_t *schema" "const char *name" \
+"uint32_t flags" "const uint8_t *defaultVal"
+.Sh DESCRIPTION
+The PCI Single-Root I/O Virtualization
+.Pq SR-IOV
+configuration schema is a data
+structure that describes the device-specific configuration parameters that a PF
+driver will accept when SR-IOV is enabled on the PF device.
+Each PF driver defines two schema instances: the PF schema and the VF schema.
+The PF schema describes configuration that applies to the PF device as a whole.
+The VF schema describes configuration that applies to an individual VF device.
+Different VF devices may have different configuration applied to them, as long
+as the configuration for each VF conforms to the VF schema.
+.Pp
+A PF driver builds a configuration schema by first allocating a schema node and
+then adding configuration parameter specifications to the schema.
+The configuration parameter specification consists of a name and a value type.
+.Pp
+Configuration parameter names are case-insensitive.
+It is an error to specify two or more configuration parameters with the same
+name.
+It is also an error to specific a configuration parameter that uses the same
+name as a configuration parameter used by the SR-IOV infrastructure.
+See
+.Xr iovctl.conf 5
+for documentation of all configuration parameters used by the SR-IOV
+infrastructure.
+.Pp
+The parameter type constrains the possible values that the configuration
+parameter may take.
+.Pp
+A configuration parameter may be specified as a required parameter by setting
+the
+.Dv IOV_SCHEMA_REQUIRED
+flag in the
+.Pa flags
+argument.
+Required parameters must be specified by the user when SR-IOV is enabled.
+If the user does not specify a required parameter, the SR-IOV infrastructure
+will abort the request to enable SR-IOV and return an error to the user.
+.Pp
+Alternatively, a configuration parameter may be given a default value by
+setting the
+.Dv IOV_SCHEMA_HASDEFAULT
+flag in the
+.Pa flags
+argument.
+If a configuration parameter has a default value but the user has not specified
+a value for that parameter, then the SR-IOV infrastructure will apply
+.Pa defaultVal
+for that parameter in the configuration before passing it to the PF driver.
+It is an error for the value of the
+.Pa defaultVal
+parameter to not conform to the restrictions of the specified type.
+If this flag is not specified then the
+.Pa defaultVal
+argument is ignored.
+This flag is not compatible with the
+.Dv IOV_SCHEMA_REQUIRED
+flag; it is an error to specify both on the same parameter.
+.Pp
+The SR-IOV infrastructure guarantees that all configuration parameters that are
+either specified as required or given a default value will be present in the
+configuration passed to the PF driver.
+Configuration parameters that are neither specified as required nor given a
+default value are optional and may or may not be present in the configuration
+passed to the PF driver.
+.Pp
+It is highly recommended that a PF driver reserve the use of optional parameters
+for configuration that is truly optional.
+For example, a Network Interface PF device might have the option to encapsulate
+all traffic to and from a VF device in a vlan tag.
+The PF driver could expose that option as a "vlan" parameter accepting an
+integer argument specifying the vlan tag.
+In this case, it would be appropriate to set the "vlan" parameter as an optional
+parameter as it would be legitimate for a VF to be configured to have no vlan
+tagging enabled at all.
+.Pp
+Alternatively, if the PF device had an boolean option that controlled whether
+the VF was allowed to change its MAC address, it would not be appropriate to
+set this parameter as optional.
+The PF driver must either allow the MAC to change or not, so it would be more
+appropriate for the PF driver to document the default behaviour by specifying
+a default value in the schema
+.Po or potentially force the user to make the choice by setting the parameter
+to be required
+.Pc .
+.Pp
+Configuration parameters that have security implications must default to the
+most secure configuration possible.
+.Pp
+All device-specific configuration parameters must be documented in the manual
+page for the PF driver, or in a separate manual page that is cross-referenced
+from the main driver manual page.
+.Pp
+It is not necessary for a PF driver to check for failure from any of these
+functions.
+If an error occurs, it is flagged in the schema.
+The
+.Xr pci_iov_attach 9
+function checks for this error and will fail to initialize SR-IOV on the PF
+device if an error is set in the schema.
+If this occurs, it is recommended that the PF driver still succeed in attaching
+and run with SR-IOV disabled on the device.
+.Pp
+The
+.Fn pci_iov_schema_alloc_node
+function is used to allocate an empty configuration schema.
+It is not necessary to check for failure from this function.
+The SR-IOV infrastructure will gracefully handle failure to allocate a schema
+and will simply not enable SR-IOV on the PF device.
+.Pp
+The
+.Fn pci_iov_schema_add_bool
+function is used to specify a configuration parameter in the given schema with
+the name
+.Pa name
+and having a boolean type.
+Boolean values can only take the value true or false (1 or 0, respectively).
+.Pp
+The
+.Fn pci_iov_schema_add_string
+function is used to specify a configuration parameter in the given schema with
+the name
+.Pa name
+and having a string type.
+String values are standard C strings.
+.Pp
+The
+.Fn pci_iov_schema_add_uint8
+function is used to specify a configuration parameter in the given schema with
+the name
+.Pa name
+and having a
+.Vt uint8_t
+type.
+Values of type
+.Vt uint8_t
+are unsigned integers in the range 0 to 255, inclusive.
+.Pp
+The
+.Fn pci_iov_schema_add_uint16
+function is used to specify a configuration parameter in the given schema with
+the name
+.Pa name
+and having a
+.Vt uint16_t
+type.
+Values of type
+.Vt uint16_t
+are unsigned integers in the range 0 to 65535, inclusive.
+.Pp
+The
+.Fn pci_iov_schema_add_uint32
+function is used to specify a configuration parameter in the given schema with
+the name
+.Pa name
+and having a
+.Vt uint32_t
+type.
+Values of type
+.Vt uint32_t
+are unsigned integers in the range 0 to
+.Pq 2**32 - 1 ,
+inclusive.
+.Pp
+The
+.Fn pci_iov_schema_add_uint64
+function is used to specify a configuration parameter in the given schema with
+the name
+.Pa name
+and having a
+.Vt uint64_t
+type.
+Values of type
+.Vt uint64_t
+are unsigned integers in the range 0 to
+.Pq 2**64 - 1 ,
+inclusive.
+.Pp
+The
+.Fn pci_iov_schema_add_unicast_mac
+function is used to specify a configuration parameter in the given schema with
+the name
+.Pa name
+and having a unicast-mac type.
+Values of type unicast-mac are binary values exactly 6 bytes long.
+The MAC address is guaranteed to not be a multicast or broadcast address.
+.Sh RETURN VALUES
+The
+.Fn pci_iov_schema_alloc_node
+function returns a pointer to the allocated schema, or NULL if a failure occurs.
+.Sh SEE ALSO
+.Xr pci 9 ,
+.Xr PCI_IOV_ADD_VF 9 ,
+.Xr PCI_IOV_INIT 9
+.Sh AUTHORS
+This manual page was written by
+.An Ryan Stone Aq rstone@FreeBSD.org .
Index: sys/amd64/conf/GENERIC
===================================================================
--- sys/amd64/conf/GENERIC
+++ sys/amd64/conf/GENERIC
@@ -90,6 +90,8 @@
device acpi
options ACPI_DMAR
device pci
+options PCI_IOV # PCI SR-IOV support
+
# Floppy drives
device fdc
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -2003,6 +2003,9 @@
dev/pci/isa_pci.c optional pci isa
dev/pci/pci.c optional pci
dev/pci/pci_if.m standard
+dev/pci/pci_iov.c optional pci pci_iov
+dev/pci/pci_iov_if.m standard
+dev/pci/pci_iov_schema.c optional pci pci_iov
dev/pci/pci_pci.c optional pci
dev/pci/pci_subr.c optional pci
dev/pci/pci_user.c optional pci
Index: sys/conf/options
===================================================================
--- sys/conf/options
+++ sys/conf/options
@@ -165,6 +165,7 @@
NSWBUF_MIN opt_swap.h
MBUF_PACKET_ZONE_DISABLE opt_global.h
PANIC_REBOOT_WAIT_TIME opt_panic.h
+PCI_IOV opt_global.h
PPC_DEBUG opt_ppc.h
PPC_PROBE_CHIPSET opt_ppc.h
PPS_SYNC opt_ntp.h
Index: sys/dev/acpica/acpi_pci.c
===================================================================
--- sys/dev/acpica/acpi_pci.c
+++ sys/dev/acpica/acpi_pci.c
@@ -84,6 +84,11 @@
static void acpi_pci_update_device(ACPI_HANDLE handle, device_t pci_child);
static bus_dma_tag_t acpi_pci_get_dma_tag(device_t bus, device_t child);
+#ifdef PCI_IOV
+static device_t acpi_pci_create_iov_child(device_t bus, device_t pf,
+ uint16_t rid, uint16_t vid, uint16_t did);
+#endif
+
static device_method_t acpi_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, acpi_pci_probe),
@@ -98,6 +103,9 @@
/* PCI interface */
DEVMETHOD(pci_set_powerstate, acpi_pci_set_powerstate_method),
+#ifdef PCI_IOV
+ DEVMETHOD(pci_create_iov_child, acpi_pci_create_iov_child),
+#endif
DEVMETHOD_END
};
@@ -345,3 +353,23 @@
return (pci_get_dma_tag(bus, child));
}
#endif
+
+#ifdef PCI_IOV
+static device_t
+acpi_pci_create_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
+ uint16_t did)
+{
+ struct acpi_pci_devinfo *dinfo;
+ device_t vf;
+
+ vf = pci_add_iov_child(bus, pf, sizeof(struct acpi_pci_devinfo), rid,
+ vid, did);
+ if (vf == NULL)
+ return (NULL);
+
+ dinfo = device_get_ivars(vf);
+ dinfo->ap_handle = NULL;
+ return (vf);
+}
+#endif
+
Index: sys/dev/pci/pci.c
===================================================================
--- sys/dev/pci/pci.c
+++ sys/dev/pci/pci.c
@@ -77,7 +77,6 @@
static int pci_has_quirk(uint32_t devid, int quirk);
static pci_addr_t pci_mapbase(uint64_t mapreg);
static const char *pci_maptype(uint64_t mapreg);
-static int pci_mapsize(uint64_t testval);
static int pci_maprange(uint64_t mapreg);
static pci_addr_t pci_rombase(uint64_t mapreg);
static int pci_romsize(uint64_t testval);
@@ -126,6 +125,9 @@
static uint16_t pci_get_rid_method(device_t dev, device_t child);
+static struct pci_devinfo * pci_fill_devinfo(device_t pcib, int d, int b, int s,
+ int f, uint16_t vid, uint16_t did, size_t size);
+
static device_method_t pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, pci_probe),
@@ -185,6 +187,11 @@
DEVMETHOD(pci_msi_count, pci_msi_count_method),
DEVMETHOD(pci_msix_count, pci_msix_count_method),
DEVMETHOD(pci_get_rid, pci_get_rid_method),
+#ifdef PCI_IOV
+ DEVMETHOD(pci_iov_attach, pci_iov_attach_method),
+ DEVMETHOD(pci_iov_detach, pci_iov_detach_method),
+ DEVMETHOD(pci_create_iov_child, pci_create_iov_child_method),
+#endif
DEVMETHOD_END
};
@@ -493,7 +500,7 @@
/* return log2 of map size decoded for memory or port map */
-static int
+int
pci_mapsize(uint64_t testval)
{
int ln2size;
@@ -537,7 +544,7 @@
}
return (ln2size);
}
-
+
/* return log2 of address range supported by map register */
static int
@@ -606,73 +613,81 @@
pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
{
#define REG(n, w) PCIB_READ_CONFIG(pcib, b, s, f, n, w)
- pcicfgregs *cfg = NULL;
- struct pci_devinfo *devlist_entry;
- struct devlist *devlist_head;
+ uint16_t vid, did;
- devlist_head = &pci_devq;
+ vid = REG(PCIR_VENDOR, 2);
+ did = REG(PCIR_DEVICE, 2);
+ if (vid != 0xffff)
+ return (pci_fill_devinfo(pcib, d, b, s, f, vid, did, size));
+
+ return (NULL);
+}
- devlist_entry = NULL;
+static struct pci_devinfo *
+pci_fill_devinfo(device_t pcib, int d, int b, int s, int f, uint16_t vid,
+ uint16_t did, size_t size)
+{
+ struct pci_devinfo *devlist_entry;
+ pcicfgregs *cfg;
- if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
- devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
- if (devlist_entry == NULL)
- return (NULL);
+ devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
+ cfg = &devlist_entry->cfg;
+
+ cfg->domain = d;
+ cfg->bus = b;
+ cfg->slot = s;
+ cfg->func = f;
+ cfg->vendor = vid;
+ cfg->device = did;
+ cfg->cmdreg = REG(PCIR_COMMAND, 2);
+ cfg->statreg = REG(PCIR_STATUS, 2);
+ cfg->baseclass = REG(PCIR_CLASS, 1);
+ cfg->subclass = REG(PCIR_SUBCLASS, 1);
+ cfg->progif = REG(PCIR_PROGIF, 1);
+ cfg->revid = REG(PCIR_REVID, 1);
+ cfg->hdrtype = REG(PCIR_HDRTYPE, 1);
+ cfg->cachelnsz = REG(PCIR_CACHELNSZ, 1);
+ cfg->lattimer = REG(PCIR_LATTIMER, 1);
+ cfg->intpin = REG(PCIR_INTPIN, 1);
+ cfg->intline = REG(PCIR_INTLINE, 1);
+
+ cfg->mfdev = (cfg->hdrtype & PCIM_MFDEV) != 0;
+ cfg->hdrtype &= ~PCIM_MFDEV;
+ STAILQ_INIT(&cfg->maps);
+
+ cfg->devinfo_size = size;
+ cfg->iov = NULL;
+
+ pci_fixancient(cfg);
+ pci_hdrtypedata(pcib, b, s, f, cfg);
+
+ if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
+ pci_read_cap(pcib, cfg);
+
+ STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links);
+
+ devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
+ devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
+ devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
+ devlist_entry->conf.pc_sel.pc_func = cfg->func;
+ devlist_entry->conf.pc_hdr = cfg->hdrtype;
+
+ devlist_entry->conf.pc_subvendor = cfg->subvendor;
+ devlist_entry->conf.pc_subdevice = cfg->subdevice;
+ devlist_entry->conf.pc_vendor = cfg->vendor;
+ devlist_entry->conf.pc_device = cfg->device;
+
+ devlist_entry->conf.pc_class = cfg->baseclass;
+ devlist_entry->conf.pc_subclass = cfg->subclass;
+ devlist_entry->conf.pc_progif = cfg->progif;
+ devlist_entry->conf.pc_revid = cfg->revid;
+
+ pci_numdevs++;
+ pci_generation++;
- cfg = &devlist_entry->cfg;
-
- cfg->domain = d;
- cfg->bus = b;
- cfg->slot = s;
- cfg->func = f;
- cfg->vendor = REG(PCIR_VENDOR, 2);
- cfg->device = REG(PCIR_DEVICE, 2);
- cfg->cmdreg = REG(PCIR_COMMAND, 2);
- cfg->statreg = REG(PCIR_STATUS, 2);
- cfg->baseclass = REG(PCIR_CLASS, 1);
- cfg->subclass = REG(PCIR_SUBCLASS, 1);
- cfg->progif = REG(PCIR_PROGIF, 1);
- cfg->revid = REG(PCIR_REVID, 1);
- cfg->hdrtype = REG(PCIR_HDRTYPE, 1);
- cfg->cachelnsz = REG(PCIR_CACHELNSZ, 1);
- cfg->lattimer = REG(PCIR_LATTIMER, 1);
- cfg->intpin = REG(PCIR_INTPIN, 1);
- cfg->intline = REG(PCIR_INTLINE, 1);
-
- cfg->mfdev = (cfg->hdrtype & PCIM_MFDEV) != 0;
- cfg->hdrtype &= ~PCIM_MFDEV;
- STAILQ_INIT(&cfg->maps);
-
- pci_fixancient(cfg);
- pci_hdrtypedata(pcib, b, s, f, cfg);
-
- if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
- pci_read_cap(pcib, cfg);
-
- STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
-
- devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
- devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
- devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
- devlist_entry->conf.pc_sel.pc_func = cfg->func;
- devlist_entry->conf.pc_hdr = cfg->hdrtype;
-
- devlist_entry->conf.pc_subvendor = cfg->subvendor;
- devlist_entry->conf.pc_subdevice = cfg->subdevice;
- devlist_entry->conf.pc_vendor = cfg->vendor;
- devlist_entry->conf.pc_device = cfg->device;
-
- devlist_entry->conf.pc_class = cfg->baseclass;
- devlist_entry->conf.pc_subclass = cfg->subclass;
- devlist_entry->conf.pc_progif = cfg->progif;
- devlist_entry->conf.pc_revid = cfg->revid;
-
- pci_numdevs++;
- pci_generation++;
- }
return (devlist_entry);
-#undef REG
}
+#undef REG
static void
pci_read_cap(device_t pcib, pcicfgregs *cfg)
@@ -2673,8 +2688,9 @@
return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
}
-static void
-pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
+void
+pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp,
+ int *bar64)
{
struct pci_devinfo *dinfo;
pci_addr_t map, testval;
@@ -2694,6 +2710,8 @@
pci_write_config(dev, reg, map, 4);
*mapp = map;
*testvalp = testval;
+ if (bar64 != NULL)
+ *bar64 = 0;
return;
}
@@ -2735,6 +2753,8 @@
*mapp = map;
*testvalp = testval;
+ if (bar64 != NULL)
+ *bar64 = (ln2range == 64);
}
static void
@@ -2789,7 +2809,7 @@
return ((cmd & PCIM_CMD_PORTEN) != 0);
}
-static struct pci_map *
+struct pci_map *
pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
{
struct pci_devinfo *dinfo;
@@ -2860,7 +2880,7 @@
return (barlen);
}
- pci_read_bar(dev, reg, &map, &testval);
+ pci_read_bar(dev, reg, &map, &testval, NULL);
if (PCI_BAR_MEM(map)) {
type = SYS_RES_MEMORY;
if (map & PCIM_BAR_MEM_PREFETCH)
@@ -3594,6 +3614,51 @@
#undef REG
}
+#ifdef PCI_IOV
+device_t
+pci_add_iov_child(device_t bus, device_t pf, size_t size, uint16_t rid,
+ uint16_t vid, uint16_t did)
+{
+ struct pci_devinfo *pf_dinfo, *vf_dinfo;
+ device_t pcib;
+ int busno, slot, func;
+
+ pf_dinfo = device_get_ivars(pf);
+
+ /*
+ * Do a sanity check that we have been passed the correct size. If this
+ * test fails then likely the pci subclass hasn't implemented the
+ * pci_create_iov_child method like it's supposed it.
+ */
+ if (size != pf_dinfo->cfg.devinfo_size) {
+ device_printf(pf,
+ "PCI subclass does not properly implement PCI_IOV\n");
+ return (NULL);
+ }
+
+ pcib = device_get_parent(bus);
+
+ PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
+
+ vf_dinfo = pci_fill_devinfo(pcib, pci_get_domain(pcib), busno, slot, func,
+ vid, did, size);
+
+ vf_dinfo->cfg.flags |= PCICFG_VF;
+ pci_add_child(bus, vf_dinfo);
+
+ return (vf_dinfo->cfg.dev);
+}
+
+device_t
+pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
+ uint16_t vid, uint16_t did)
+{
+
+ return (pci_add_iov_child(bus, pf, sizeof(struct pci_devinfo), rid, vid,
+ did));
+}
+#endif
+
void
pci_add_child(device_t bus, struct pci_devinfo *dinfo)
{
@@ -4523,7 +4588,7 @@
static struct resource *
pci_reserve_map(device_t dev, device_t child, int type, int *rid,
- u_long start, u_long end, u_long count, u_int flags)
+ u_long start, u_long end, u_long count, u_int num, u_int flags)
{
struct pci_devinfo *dinfo = device_get_ivars(child);
struct resource_list *rl = &dinfo->resources;
@@ -4547,7 +4612,7 @@
* have a atapci device in legacy mode and it fails
* here, that other code is broken.
*/
- pci_read_bar(child, *rid, &map, &testval);
+ pci_read_bar(child, *rid, &map, &testval, NULL);
/*
* Determine the size of the BAR and ignore BARs with a size
@@ -4620,8 +4685,8 @@
}
struct resource *
-pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
- u_long start, u_long end, u_long count, u_int flags)
+pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid,
+ u_long start, u_long end, u_long count, u_long num, u_int flags)
{
struct pci_devinfo *dinfo;
struct resource_list *rl;
@@ -4689,7 +4754,7 @@
rle = resource_list_find(rl, type, *rid);
if (rle == NULL) {
res = pci_reserve_map(dev, child, type, rid, start, end,
- count, flags);
+ count, num, flags);
if (res == NULL)
return (NULL);
}
@@ -4698,6 +4763,38 @@
start, end, count, flags));
}
+struct resource *
+pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
+ u_long start, u_long end, u_long count, u_int flags)
+{
+#ifdef PCI_IOV
+ struct pci_devinfo *dinfo;
+#endif
+
+ if (device_get_parent(child) != dev)
+ return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
+ type, rid, start, end, count, flags));
+
+#ifdef PCI_IOV
+ dinfo = device_get_ivars(child);
+ if (dinfo->cfg.flags & PCICFG_VF) {
+ switch (type) {
+ /* VFs can't have I/O BARs. */
+ case SYS_RES_IOPORT:
+ return (NULL);
+ case SYS_RES_MEMORY:
+ return (pci_vf_alloc_mem_resource(dev, child, rid,
+ start, end, count, flags));
+ }
+
+ /* Fall through for other types of resource allocations. */
+ }
+#endif
+
+ return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
+ count, 1, flags));
+}
+
int
pci_release_resource(device_t dev, device_t child, int type, int rid,
struct resource *r)
@@ -4712,6 +4809,22 @@
dinfo = device_get_ivars(child);
cfg = &dinfo->cfg;
+
+#ifdef PCI_IOV
+ if (dinfo->cfg.flags & PCICFG_VF) {
+ switch (type) {
+ /* VFs can't have I/O BARs. */
+ case SYS_RES_IOPORT:
+ return (EDOOFUS);
+ case SYS_RES_MEMORY:
+ return (pci_vf_release_mem_resource(dev, child, rid,
+ r));
+ }
+
+ /* Fall through for other types of resource allocations. */
+ }
+#endif
+
#ifdef NEW_PCIB
/*
* PCI-PCI bridge I/O window resources are not BARs. For
@@ -4874,6 +4987,38 @@
struct pci_devinfo *dinfo = device_get_ivars(child);
pcicfgregs *cfg = &dinfo->cfg;
+#ifdef PCI_IOV
+ /*
+ * SR-IOV VFs don't implement the VID or DID registers, so we have to
+ * emulate them here.
+ */
+ if (cfg->flags & PCICFG_VF) {
+ if (reg == PCIR_VENDOR) {
+ switch (width) {
+ case 4:
+ return (cfg->device << 16 | cfg->vendor);
+ case 2:
+ return (cfg->vendor);
+ case 1:
+ return (cfg->vendor & 0xff);
+ default:
+ return (0xffffffff);
+ }
+ } else if (reg == PCIR_DEVICE) {
+ switch (width) {
+ /* Note that an unaligned 4-byte read is an error. */
+ case 2:
+ return (cfg->device);
+ case 1:
+ return (cfg->device & 0xff);
+ default:
+ return (0xffffffff);
+ }
+ }
+ }
+#endif
+
+
return (PCIB_READ_CONFIG(device_get_parent(dev),
cfg->bus, cfg->slot, cfg->func, reg, width));
}
Index: sys/dev/pci/pci_if.m
===================================================================
--- sys/dev/pci/pci_if.m
+++ sys/dev/pci/pci_if.m
@@ -36,8 +36,19 @@
{
return (0);
}
+
+ static device_t
+ null_create_iov_child(device_t bus, device_t pf, uint16_t rid,
+ uint16_t vid, uint16_t did)
+ {
+ device_printf(bus, "PCI_IOV not implemented on this bus.\n");
+ return (NULL);
+ }
};
+HEADER {
+ struct nvlist;
+}
METHOD u_int32_t read_config {
device_t dev;
@@ -165,3 +176,22 @@
device_t child;
};
+METHOD int iov_attach {
+ device_t dev;
+ device_t child;
+ struct nvlist *pf_schema;
+ struct nvlist *vf_schema;
+};
+
+METHOD int iov_detach {
+ device_t dev;
+ device_t child;
+};
+
+METHOD device_t create_iov_child {
+ device_t bus;
+ device_t pf;
+ uint16_t rid;
+ uint16_t vid;
+ uint16_t did;
+} DEFAULT null_create_iov_child;
Index: sys/dev/pci/pci_iov.h
===================================================================
--- /dev/null
+++ sys/dev/pci/pci_iov.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 2013-2015 Sandvine Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/dev/pci/pci_iov.h 283670 2015-05-28 22:01:50Z jhb $
+ */
+
+#ifndef _PCI_IOV_H_
+#define _PCI_IOV_H_
+
+#include "pci_iov_if.h"
+
+struct nvlist;
+
+static __inline int
+pci_iov_attach(device_t dev, struct nvlist *pf_schema, struct nvlist *vf_schema)
+{
+ return (PCI_IOV_ATTACH(device_get_parent(dev), dev, pf_schema,
+ vf_schema));
+}
+
+static __inline int
+pci_iov_detach(device_t dev)
+{
+ return (PCI_IOV_DETACH(device_get_parent(dev), dev));
+}
+
+#endif /* !_PCI_IOV_H_ */
Index: sys/dev/pci/pci_iov.c
===================================================================
--- /dev/null
+++ sys/dev/pci/pci_iov.c
@@ -0,0 +1,980 @@
+/*-
+ * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/dev/pci/pci_iov.c 283670 2015-05-28 22:01:50Z jhb $");
+
+#include "opt_bus.h"
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+#include <sys/iov.h>
+#include <sys/linker.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/pciio.h>
+#include <sys/queue.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+
+#include <machine/bus.h>
+#include <machine/stdarg.h>
+
+#include <sys/nv.h>
+#include <sys/iov_schema.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pci_iov.h>
+#include <dev/pci/pci_private.h>
+#include <dev/pci/pci_iov_private.h>
+#include <dev/pci/schema_private.h>
+
+#include "pcib_if.h"
+
+static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations");
+
+static d_ioctl_t pci_iov_ioctl;
+
+static struct cdevsw iov_cdevsw = {
+ .d_version = D_VERSION,
+ .d_name = "iov",
+ .d_ioctl = pci_iov_ioctl
+};
+
+SYSCTL_DECL(_hw_pci);
+
+/*
+ * The maximum amount of memory we will allocate for user configuration of an
+ * SR-IOV device. 1MB ought to be enough for anyone, but leave this
+ * configurable just in case.
+ */
+static u_long pci_iov_max_config = 1024 * 1024;
+SYSCTL_ULONG(_hw_pci, OID_AUTO, iov_max_config, CTLFLAG_RWTUN,
+ &pci_iov_max_config, 0, "Maximum allowed size of SR-IOV configuration.");
+
+
+#define IOV_READ(d, r, w) \
+ pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w)
+
+#define IOV_WRITE(d, r, v, w) \
+ pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w)
+
+static nvlist_t *pci_iov_build_schema(nvlist_t **pf_schema,
+ nvlist_t **vf_schema);
+static void pci_iov_build_pf_schema(nvlist_t *schema,
+ nvlist_t **driver_schema);
+static void pci_iov_build_vf_schema(nvlist_t *schema,
+ nvlist_t **driver_schema);
+static nvlist_t *pci_iov_get_pf_subsystem_schema(void);
+static nvlist_t *pci_iov_get_vf_subsystem_schema(void);
+
+int
+pci_iov_attach_method(device_t bus, device_t dev, nvlist_t *pf_schema,
+ nvlist_t *vf_schema)
+{
+ device_t pcib;
+ struct pci_devinfo *dinfo;
+ struct pcicfg_iov *iov;
+ nvlist_t *schema;
+ uint32_t version;
+ int error;
+ int iov_pos;
+
+ dinfo = device_get_ivars(dev);
+ pcib = device_get_parent(bus);
+ schema = NULL;
+
+ error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos);
+
+ if (error != 0)
+ return (error);
+
+ version = pci_read_config(dev, iov_pos, 4);
+ if (PCI_EXTCAP_VER(version) != 1) {
+ if (bootverbose)
+ device_printf(dev,
+ "Unsupported version of SR-IOV (%d) detected\n",
+ PCI_EXTCAP_VER(version));
+
+ return (ENXIO);
+ }
+
+ iov = malloc(sizeof(*dinfo->cfg.iov), M_SRIOV, M_WAITOK | M_ZERO);
+
+ mtx_lock(&Giant);
+ if (dinfo->cfg.iov != NULL) {
+ error = EBUSY;
+ goto cleanup;
+ }
+ iov->iov_pos = iov_pos;
+
+ schema = pci_iov_build_schema(&pf_schema, &vf_schema);
+ if (schema == NULL) {
+ error = ENOMEM;
+ goto cleanup;
+ }
+
+ error = pci_iov_validate_schema(schema);
+ if (error != 0)
+ goto cleanup;
+ iov->iov_schema = schema;
+
+ iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev),
+ UID_ROOT, GID_WHEEL, 0600, "iov/%s", device_get_nameunit(dev));
+
+ if (iov->iov_cdev == NULL) {
+ error = ENOMEM;
+ goto cleanup;
+ }
+
+ dinfo->cfg.iov = iov;
+ iov->iov_cdev->si_drv1 = dinfo;
+ mtx_unlock(&Giant);
+
+ return (0);
+
+cleanup:
+ nvlist_destroy(schema);
+ nvlist_destroy(pf_schema);
+ nvlist_destroy(vf_schema);
+ free(iov, M_SRIOV);
+ mtx_unlock(&Giant);
+ return (error);
+}
+
+int
+pci_iov_detach_method(device_t bus, device_t dev)
+{
+ struct pci_devinfo *dinfo;
+ struct pcicfg_iov *iov;
+
+ mtx_lock(&Giant);
+ dinfo = device_get_ivars(dev);
+ iov = dinfo->cfg.iov;
+
+ if (iov == NULL) {
+ mtx_unlock(&Giant);
+ return (0);
+ }
+
+ if (iov->iov_num_vfs != 0 || iov->iov_flags & IOV_BUSY) {
+ mtx_unlock(&Giant);
+ return (EBUSY);
+ }
+
+ dinfo->cfg.iov = NULL;
+
+ if (iov->iov_cdev) {
+ destroy_dev(iov->iov_cdev);
+ iov->iov_cdev = NULL;
+ }
+ nvlist_destroy(iov->iov_schema);
+
+ free(iov, M_SRIOV);
+ mtx_unlock(&Giant);
+
+ return (0);
+}
+
+static nvlist_t *
+pci_iov_build_schema(nvlist_t **pf, nvlist_t **vf)
+{
+ nvlist_t *schema, *pf_driver, *vf_driver;
+
+ /* We always take ownership of the schemas. */
+ pf_driver = *pf;
+ *pf = NULL;
+ vf_driver = *vf;
+ *vf = NULL;
+
+ schema = pci_iov_schema_alloc_node();
+ if (schema == NULL)
+ goto cleanup;
+
+ pci_iov_build_pf_schema(schema, &pf_driver);
+ pci_iov_build_vf_schema(schema, &vf_driver);
+
+ if (nvlist_error(schema) != 0)
+ goto cleanup;
+
+ return (schema);
+
+cleanup:
+ nvlist_destroy(schema);
+ nvlist_destroy(pf_driver);
+ nvlist_destroy(vf_driver);
+ return (NULL);
+}
+
+static void
+pci_iov_build_pf_schema(nvlist_t *schema, nvlist_t **driver_schema)
+{
+ nvlist_t *pf_schema, *iov_schema;
+
+ pf_schema = pci_iov_schema_alloc_node();
+ if (pf_schema == NULL) {
+ nvlist_set_error(schema, ENOMEM);
+ return;
+ }
+
+ iov_schema = pci_iov_get_pf_subsystem_schema();
+
+ /*
+ * Note that if either *driver_schema or iov_schema is NULL, then
+ * nvlist_move_nvlist will put the schema in the error state and
+ * SR-IOV will fail to initialize later, so we don't have to explicitly
+ * handle that case.
+ */
+ nvlist_move_nvlist(pf_schema, DRIVER_CONFIG_NAME, *driver_schema);
+ nvlist_move_nvlist(pf_schema, IOV_CONFIG_NAME, iov_schema);
+ nvlist_move_nvlist(schema, PF_CONFIG_NAME, pf_schema);
+ *driver_schema = NULL;
+}
+
+static void
+pci_iov_build_vf_schema(nvlist_t *schema, nvlist_t **driver_schema)
+{
+ nvlist_t *vf_schema, *iov_schema;
+
+ vf_schema = pci_iov_schema_alloc_node();
+ if (vf_schema == NULL) {
+ nvlist_set_error(schema, ENOMEM);
+ return;
+ }
+
+ iov_schema = pci_iov_get_vf_subsystem_schema();
+
+ /*
+ * Note that if either *driver_schema or iov_schema is NULL, then
+ * nvlist_move_nvlist will put the schema in the error state and
+ * SR-IOV will fail to initialize later, so we don't have to explicitly
+ * handle that case.
+ */
+ nvlist_move_nvlist(vf_schema, DRIVER_CONFIG_NAME, *driver_schema);
+ nvlist_move_nvlist(vf_schema, IOV_CONFIG_NAME, iov_schema);
+ nvlist_move_nvlist(schema, VF_SCHEMA_NAME, vf_schema);
+ *driver_schema = NULL;
+}
+
+static nvlist_t *
+pci_iov_get_pf_subsystem_schema(void)
+{
+ nvlist_t *pf;
+
+ pf = pci_iov_schema_alloc_node();
+ if (pf == NULL)
+ return (NULL);
+
+ pci_iov_schema_add_uint16(pf, "num_vfs", IOV_SCHEMA_REQUIRED, -1);
+ pci_iov_schema_add_string(pf, "device", IOV_SCHEMA_REQUIRED, NULL);
+
+ return (pf);
+}
+
+static nvlist_t *
+pci_iov_get_vf_subsystem_schema(void)
+{
+ nvlist_t *vf;
+
+ vf = pci_iov_schema_alloc_node();
+ if (vf == NULL)
+ return (NULL);
+
+ pci_iov_schema_add_bool(vf, "passthrough", IOV_SCHEMA_HASDEFAULT, 0);
+
+ return (vf);
+}
+
+static int
+pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift)
+{
+ struct resource *res;
+ struct pcicfg_iov *iov;
+ device_t dev, bus;
+ u_long start, end;
+ pci_addr_t bar_size;
+ int rid;
+
+ iov = dinfo->cfg.iov;
+ dev = dinfo->cfg.dev;
+ bus = device_get_parent(dev);
+ rid = iov->iov_pos + PCIR_SRIOV_BAR(bar);
+ bar_size = 1 << bar_shift;
+
+ res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0ul,
+ ~0ul, 1, iov->iov_num_vfs, RF_ACTIVE);
+
+ if (res == NULL)
+ return (ENXIO);
+
+ iov->iov_bar[bar].res = res;
+ iov->iov_bar[bar].bar_size = bar_size;
+ iov->iov_bar[bar].bar_shift = bar_shift;
+
+ start = rman_get_start(res);
+ end = rman_get_end(res);
+ return (rman_manage_region(&iov->rman, start, end));
+}
+
+static void
+pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo)
+{
+ struct pci_iov_bar *bar;
+ uint64_t bar_start;
+ int i;
+
+ for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
+ bar = &iov->iov_bar[i];
+ if (bar->res != NULL) {
+ bar_start = rman_get_start(bar->res) +
+ dinfo->cfg.vf.index * bar->bar_size;
+
+ pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start,
+ bar->bar_shift);
+ }
+ }
+}
+
+static int
+pci_iov_parse_config(struct pcicfg_iov *iov, struct pci_iov_arg *arg,
+ nvlist_t **ret)
+{
+ void *packed_config;
+ nvlist_t *config;
+ int error;
+
+ config = NULL;
+ packed_config = NULL;
+
+ if (arg->len > pci_iov_max_config) {
+ error = EMSGSIZE;
+ goto out;
+ }
+
+ packed_config = malloc(arg->len, M_SRIOV, M_WAITOK);
+
+ error = copyin(arg->config, packed_config, arg->len);
+ if (error != 0)
+ goto out;
+
+ config = nvlist_unpack(packed_config, arg->len, NV_FLAG_IGNORE_CASE);
+ if (config == NULL) {
+ error = EINVAL;
+ goto out;
+ }
+
+ error = pci_iov_schema_validate_config(iov->iov_schema, config);
+ if (error != 0)
+ goto out;
+
+ error = nvlist_error(config);
+ if (error != 0)
+ goto out;
+
+ *ret = config;
+ config = NULL;
+
+out:
+ nvlist_destroy(config);
+ free(packed_config, M_SRIOV);
+ return (error);
+}
+
+/*
+ * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV
+ * capability. This bit is only writeable on the lowest-numbered PF but
+ * affects all PFs on the device.
+ */
+static int
+pci_iov_set_ari(device_t bus)
+{
+ device_t lowest;
+ device_t *devlist;
+ int i, error, devcount, lowest_func, lowest_pos, iov_pos, dev_func;
+ uint16_t iov_ctl;
+
+ /* If ARI is disabled on the downstream port there is nothing to do. */
+ if (!PCIB_ARI_ENABLED(device_get_parent(bus)))
+ return (0);
+
+ error = device_get_children(bus, &devlist, &devcount);
+
+ if (error != 0)
+ return (error);
+
+ lowest = NULL;
+ for (i = 0; i < devcount; i++) {
+ if (pci_find_extcap(devlist[i], PCIZ_SRIOV, &iov_pos) == 0) {
+ dev_func = pci_get_function(devlist[i]);
+ if (lowest == NULL || dev_func < lowest_func) {
+ lowest = devlist[i];
+ lowest_func = dev_func;
+ lowest_pos = iov_pos;
+ }
+ }
+ }
+
+ /*
+ * If we called this function some device must have the SR-IOV
+ * capability.
+ */
+ KASSERT(lowest != NULL,
+ ("Could not find child of %s with SR-IOV capability",
+ device_get_nameunit(bus)));
+
+ iov_ctl = pci_read_config(lowest, iov_pos + PCIR_SRIOV_CTL, 2);
+ iov_ctl |= PCIM_SRIOV_ARI_EN;
+ pci_write_config(lowest, iov_pos + PCIR_SRIOV_CTL, iov_ctl, 2);
+ free(devlist, M_TEMP);
+ return (0);
+}
+
+static int
+pci_iov_config_page_size(struct pci_devinfo *dinfo)
+{
+ uint32_t page_cap, page_size;
+
+ page_cap = IOV_READ(dinfo, PCIR_SRIOV_PAGE_CAP, 4);
+
+ /*
+ * If the system page size is less than the smallest SR-IOV page size
+ * then round up to the smallest SR-IOV page size.
+ */
+ if (PAGE_SHIFT < PCI_SRIOV_BASE_PAGE_SHIFT)
+ page_size = (1 << 0);
+ else
+ page_size = (1 << (PAGE_SHIFT - PCI_SRIOV_BASE_PAGE_SHIFT));
+
+ /* Check that the device supports the system page size. */
+ if (!(page_size & page_cap))
+ return (ENXIO);
+
+ IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, page_size, 4);
+ return (0);
+}
+
+static int
+pci_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *config)
+{
+ const nvlist_t *device, *driver_config;
+
+ device = nvlist_get_nvlist(config, PF_CONFIG_NAME);
+ driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
+ return (PCI_IOV_INIT(dev, num_vfs, driver_config));
+}
+
+static int
+pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov)
+{
+ int error;
+
+ iov->rman.rm_start = 0;
+ iov->rman.rm_end = ~0ul;
+ iov->rman.rm_type = RMAN_ARRAY;
+ snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory",
+ device_get_nameunit(pf));
+ iov->rman.rm_descr = iov->rman_name;
+
+ error = rman_init(&iov->rman);
+ if (error != 0)
+ return (error);
+
+ iov->iov_flags |= IOV_RMAN_INITED;
+ return (0);
+}
+
+static int
+pci_iov_setup_bars(struct pci_devinfo *dinfo)
+{
+ device_t dev;
+ struct pcicfg_iov *iov;
+ pci_addr_t bar_value, testval;
+ int i, last_64, error;
+
+ iov = dinfo->cfg.iov;
+ dev = dinfo->cfg.dev;
+ last_64 = 0;
+
+ for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
+ /*
+ * If a PCI BAR is a 64-bit wide BAR, then it spans two
+ * consecutive registers. Therefore if the last BAR that
+ * we looked at was a 64-bit BAR, we need to skip this
+ * register as it's the second half of the last BAR.
+ */
+ if (!last_64) {
+ pci_read_bar(dev,
+ iov->iov_pos + PCIR_SRIOV_BAR(i),
+ &bar_value, &testval, &last_64);
+
+ if (testval != 0) {
+ error = pci_iov_alloc_bar(dinfo, i,
+ pci_mapsize(testval));
+ if (error != 0)
+ return (error);
+ }
+ } else
+ last_64 = 0;
+ }
+
+ return (0);
+}
+
+static void
+pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const nvlist_t *config,
+ uint16_t first_rid, uint16_t rid_stride)
+{
+ char device_name[VF_MAX_NAME];
+ const nvlist_t *device, *driver_config, *iov_config;
+ device_t bus, dev, vf;
+ struct pcicfg_iov *iov;
+ struct pci_devinfo *vfinfo;
+ size_t size;
+ int i, error;
+ uint16_t vid, did, next_rid;
+
+ iov = dinfo->cfg.iov;
+ dev = dinfo->cfg.dev;
+ bus = device_get_parent(dev);
+ size = dinfo->cfg.devinfo_size;
+ next_rid = first_rid;
+ vid = pci_get_vendor(dev);
+ did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2);
+
+ for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) {
+ snprintf(device_name, sizeof(device_name), VF_PREFIX"%d", i);
+ device = nvlist_get_nvlist(config, device_name);
+ iov_config = nvlist_get_nvlist(device, IOV_CONFIG_NAME);
+ driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
+
+ vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did);
+ if (vf == NULL)
+ break;
+
+ /*
+ * If we are creating passthrough devices then force the ppt
+ * driver to attach to prevent a VF driver from claiming the
+ * VFs.
+ */
+ if (nvlist_get_bool(iov_config, "passthrough"))
+ device_set_devclass_fixed(vf, "ppt");
+
+ vfinfo = device_get_ivars(vf);
+
+ vfinfo->cfg.iov = iov;
+ vfinfo->cfg.vf.index = i;
+
+ pci_iov_add_bars(iov, vfinfo);
+
+ error = PCI_IOV_ADD_VF(dev, i, driver_config);
+ if (error != 0) {
+ device_printf(dev, "Failed to add VF %d\n", i);
+ pci_delete_child(bus, vf);
+ }
+ }
+
+ bus_generic_attach(bus);
+}
+
+static int
+pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
+{
+ device_t bus, dev;
+ struct pci_devinfo *dinfo;
+ struct pcicfg_iov *iov;
+ nvlist_t *config;
+ int i, error;
+ uint16_t rid_off, rid_stride;
+ uint16_t first_rid, last_rid;
+ uint16_t iov_ctl;
+ uint16_t num_vfs, total_vfs;
+ int iov_inited;
+
+ mtx_lock(&Giant);
+ dinfo = cdev->si_drv1;
+ iov = dinfo->cfg.iov;
+ dev = dinfo->cfg.dev;
+ bus = device_get_parent(dev);
+ iov_inited = 0;
+ config = NULL;
+
+ if ((iov->iov_flags & IOV_BUSY) || iov->iov_num_vfs != 0) {
+ mtx_unlock(&Giant);
+ return (EBUSY);
+ }
+ iov->iov_flags |= IOV_BUSY;
+
+ error = pci_iov_parse_config(iov, arg, &config);
+ if (error != 0)
+ goto out;
+
+ num_vfs = pci_iov_config_get_num_vfs(config);
+ total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
+ if (num_vfs > total_vfs) {
+ error = EINVAL;
+ goto out;
+ }
+
+ error = pci_iov_config_page_size(dinfo);
+ if (error != 0)
+ goto out;
+
+ error = pci_iov_set_ari(bus);
+ if (error != 0)
+ goto out;
+
+ error = pci_iov_init(dev, num_vfs, config);
+ if (error != 0)
+ goto out;
+ iov_inited = 1;
+
+ IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, num_vfs, 2);
+
+ rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2);
+ rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2);
+
+ first_rid = pci_get_rid(dev) + rid_off;
+ last_rid = first_rid + (num_vfs - 1) * rid_stride;
+
+ /* We don't yet support allocating extra bus numbers for VFs. */
+ if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
+ error = ENOSPC;
+ goto out;
+ }
+
+ iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
+ iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
+ IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
+
+ error = pci_iov_init_rman(dev, iov);
+ if (error != 0)
+ goto out;
+
+ iov->iov_num_vfs = num_vfs;
+
+ error = pci_iov_setup_bars(dinfo);
+ if (error != 0)
+ goto out;
+
+ iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
+ iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE;
+ IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
+
+ /* Per specification, we must wait 100ms before accessing VFs. */
+ pause("iov", roundup(hz, 10));
+ pci_iov_enumerate_vfs(dinfo, config, first_rid, rid_stride);
+
+ nvlist_destroy(config);
+ iov->iov_flags &= ~IOV_BUSY;
+ mtx_unlock(&Giant);
+
+ return (0);
+out:
+ if (iov_inited)
+ PCI_IOV_UNINIT(dev);
+
+ for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
+ if (iov->iov_bar[i].res != NULL) {
+ pci_release_resource(bus, dev, SYS_RES_MEMORY,
+ iov->iov_pos + PCIR_SRIOV_BAR(i),
+ iov->iov_bar[i].res);
+ pci_delete_resource(bus, dev, SYS_RES_MEMORY,
+ iov->iov_pos + PCIR_SRIOV_BAR(i));
+ iov->iov_bar[i].res = NULL;
+ }
+ }
+
+ if (iov->iov_flags & IOV_RMAN_INITED) {
+ rman_fini(&iov->rman);
+ iov->iov_flags &= ~IOV_RMAN_INITED;
+ }
+
+ nvlist_destroy(config);
+ iov->iov_num_vfs = 0;
+ iov->iov_flags &= ~IOV_BUSY;
+ mtx_unlock(&Giant);
+ return (error);
+}
+
+/* Return true if child is a VF of the given PF. */
+static int
+pci_iov_is_child_vf(struct pcicfg_iov *pf, device_t child)
+{
+ struct pci_devinfo *vfinfo;
+
+ vfinfo = device_get_ivars(child);
+
+ if (!(vfinfo->cfg.flags & PCICFG_VF))
+ return (0);
+
+ return (pf == vfinfo->cfg.iov);
+}
+
+static int
+pci_iov_delete(struct cdev *cdev)
+{
+ device_t bus, dev, vf, *devlist;
+ struct pci_devinfo *dinfo;
+ struct pcicfg_iov *iov;
+ int i, error, devcount;
+ uint32_t iov_ctl;
+
+ mtx_lock(&Giant);
+ dinfo = cdev->si_drv1;
+ iov = dinfo->cfg.iov;
+ dev = dinfo->cfg.dev;
+ bus = device_get_parent(dev);
+ devlist = NULL;
+
+ if (iov->iov_flags & IOV_BUSY) {
+ mtx_unlock(&Giant);
+ return (EBUSY);
+ }
+
+ if (iov->iov_num_vfs == 0) {
+ mtx_unlock(&Giant);
+ return (ECHILD);
+ }
+
+ iov->iov_flags |= IOV_BUSY;
+
+ error = device_get_children(bus, &devlist, &devcount);
+
+ if (error != 0)
+ goto out;
+
+ for (i = 0; i < devcount; i++) {
+ vf = devlist[i];
+
+ if (!pci_iov_is_child_vf(iov, vf))
+ continue;
+
+ error = device_detach(vf);
+ if (error != 0) {
+ device_printf(dev,
+ "Could not disable SR-IOV: failed to detach VF %s\n",
+ device_get_nameunit(vf));
+ goto out;
+ }
+ }
+
+ for (i = 0; i < devcount; i++) {
+ vf = devlist[i];
+
+ if (pci_iov_is_child_vf(iov, vf))
+ pci_delete_child(bus, vf);
+ }
+ PCI_IOV_UNINIT(dev);
+
+ iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
+ iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
+ IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
+ IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, 0, 2);
+
+ iov->iov_num_vfs = 0;
+
+ for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
+ if (iov->iov_bar[i].res != NULL) {
+ pci_release_resource(bus, dev, SYS_RES_MEMORY,
+ iov->iov_pos + PCIR_SRIOV_BAR(i),
+ iov->iov_bar[i].res);
+ pci_delete_resource(bus, dev, SYS_RES_MEMORY,
+ iov->iov_pos + PCIR_SRIOV_BAR(i));
+ iov->iov_bar[i].res = NULL;
+ }
+ }
+
+ if (iov->iov_flags & IOV_RMAN_INITED) {
+ rman_fini(&iov->rman);
+ iov->iov_flags &= ~IOV_RMAN_INITED;
+ }
+
+ error = 0;
+out:
+ free(devlist, M_TEMP);
+ iov->iov_flags &= ~IOV_BUSY;
+ mtx_unlock(&Giant);
+ return (error);
+}
+
+static int
+pci_iov_get_schema_ioctl(struct cdev *cdev, struct pci_iov_schema *output)
+{
+ struct pci_devinfo *dinfo;
+ void *packed;
+ size_t output_len, size;
+ int error;
+
+ packed = NULL;
+
+ mtx_lock(&Giant);
+ dinfo = cdev->si_drv1;
+ packed = nvlist_pack(dinfo->cfg.iov->iov_schema, &size);
+ mtx_unlock(&Giant);
+
+ if (packed == NULL) {
+ error = ENOMEM;
+ goto fail;
+ }
+
+ output_len = output->len;
+ output->len = size;
+ if (size <= output_len) {
+ error = copyout(packed, output->schema, size);
+
+ if (error != 0)
+ goto fail;
+
+ output->error = 0;
+ } else
+ /*
+ * If we return an error then the ioctl code won't copyout
+ * output back to userland, so we flag the error in the struct
+ * instead.
+ */
+ output->error = EMSGSIZE;
+
+ error = 0;
+
+fail:
+ free(packed, M_NVLIST);
+
+ return (error);
+}
+
+static int
+pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
+ struct thread *td)
+{
+
+ switch (cmd) {
+ case IOV_CONFIG:
+ return (pci_iov_config(dev, (struct pci_iov_arg *)data));
+ case IOV_DELETE:
+ return (pci_iov_delete(dev));
+ case IOV_GET_SCHEMA:
+ return (pci_iov_get_schema_ioctl(dev,
+ (struct pci_iov_schema *)data));
+ default:
+ return (EINVAL);
+ }
+}
+
+struct resource *
+pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid, u_long start,
+ u_long end, u_long count, u_int flags)
+{
+ struct pci_devinfo *dinfo;
+ struct pcicfg_iov *iov;
+ struct pci_map *map;
+ struct resource *res;
+ struct resource_list_entry *rle;
+ u_long bar_start, bar_end;
+ pci_addr_t bar_length;
+ int error;
+
+ dinfo = device_get_ivars(child);
+ iov = dinfo->cfg.iov;
+
+ map = pci_find_bar(child, *rid);
+ if (map == NULL)
+ return (NULL);
+
+ bar_length = 1 << map->pm_size;
+ bar_start = map->pm_value;
+ bar_end = bar_start + bar_length - 1;
+
+ /* Make sure that the resource fits the constraints. */
+ if (bar_start >= end || bar_end <= bar_start || count != 1)
+ return (NULL);
+
+ /* Clamp the resource to the constraints if necessary. */
+ if (bar_start < start)
+ bar_start = start;
+ if (bar_end > end)
+ bar_end = end;
+ bar_length = bar_end - bar_start + 1;
+
+ res = rman_reserve_resource(&iov->rman, bar_start, bar_end,
+ bar_length, flags, child);
+ if (res == NULL)
+ return (NULL);
+
+ rle = resource_list_add(&dinfo->resources, SYS_RES_MEMORY, *rid,
+ bar_start, bar_end, 1);
+ if (rle == NULL) {
+ rman_release_resource(res);
+ return (NULL);
+ }
+
+ rman_set_rid(res, *rid);
+
+ if (flags & RF_ACTIVE) {
+ error = bus_activate_resource(child, SYS_RES_MEMORY, *rid, res);
+ if (error != 0) {
+ resource_list_delete(&dinfo->resources, SYS_RES_MEMORY,
+ *rid);
+ rman_release_resource(res);
+ return (NULL);
+ }
+ }
+ rle->res = res;
+
+ return (res);
+}
+
+int
+pci_vf_release_mem_resource(device_t dev, device_t child, int rid,
+ struct resource *r)
+{
+ struct pci_devinfo *dinfo;
+ struct resource_list_entry *rle;
+ int error;
+
+ dinfo = device_get_ivars(child);
+
+ if (rman_get_flags(r) & RF_ACTIVE) {
+ error = bus_deactivate_resource(child, SYS_RES_MEMORY, rid, r);
+ if (error != 0)
+ return (error);
+ }
+
+ rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, rid);
+ if (rle != NULL) {
+ rle->res = NULL;
+ resource_list_delete(&dinfo->resources, SYS_RES_MEMORY,
+ rid);
+ }
+
+ return (rman_release_resource(r));
+}
+
Index: sys/dev/pci/pci_iov_if.m
===================================================================
--- /dev/null
+++ sys/dev/pci/pci_iov_if.m
@@ -0,0 +1,52 @@
+#-
+# Copyright (c) 2013-2015 Sandvine Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD: head/sys/dev/pci/pci_iov_if.m 283670 2015-05-28 22:01:50Z jhb $
+#
+
+#include <sys/bus.h>
+
+INTERFACE pci_iov;
+
+HEADER {
+ struct nvlist;
+}
+
+
+METHOD int init {
+ device_t dev;
+ uint16_t num_vfs;
+ const struct nvlist *config;
+};
+
+METHOD void uninit {
+ device_t dev;
+};
+
+METHOD int add_vf {
+ device_t dev;
+ uint16_t vfnum;
+ const struct nvlist *config;
+};
Index: sys/dev/pci/pci_iov_private.h
===================================================================
--- /dev/null
+++ sys/dev/pci/pci_iov_private.h
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/dev/pci/pci_iov_private.h 279451 2015-03-01 00:40:42Z rstone $
+ */
+
+#ifndef _PCI_IOV_PRIVATE_H_
+#define _PCI_IOV_PRIVATE_H_
+
+struct pci_iov_bar {
+ struct resource *res;
+
+ pci_addr_t bar_size;
+ pci_addr_t bar_shift;
+};
+
+struct pcicfg_iov {
+ struct cdev *iov_cdev;
+ nvlist_t *iov_schema;
+
+ struct pci_iov_bar iov_bar[PCIR_MAX_BAR_0 + 1];
+ struct rman rman;
+ char rman_name[64];
+
+ int iov_pos;
+ int iov_num_vfs;
+ uint32_t iov_flags;
+};
+
+#define IOV_RMAN_INITED 0x0001
+#define IOV_BUSY 0x0002
+
+#endif
+
Index: sys/dev/pci/pci_iov_schema.c
===================================================================
--- /dev/null
+++ sys/dev/pci/pci_iov_schema.c
@@ -0,0 +1,869 @@
+/*-
+ * Copyright (c) 2014-2015 Sandvine Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/dev/pci/pci_iov_schema.c 279465 2015-03-01 00:59:28Z rstone $");
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/ctype.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/iov.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/queue.h>
+
+#include <machine/stdarg.h>
+
+#include <sys/dnv.h>
+#include <sys/nv.h>
+#include <sys/iov_schema.h>
+
+#include <net/ethernet.h>
+
+#include <dev/pci/schema_private.h>
+
+struct config_type_validator;
+typedef int (validate_func)(const struct config_type_validator *,
+ const nvlist_t *, const char *name);
+typedef int (default_validate_t)(const struct config_type_validator *,
+ const nvlist_t *);
+
+static validate_func pci_iov_schema_validate_bool;
+static validate_func pci_iov_schema_validate_string;
+static validate_func pci_iov_schema_validate_uint;
+static validate_func pci_iov_schema_validate_unicast_mac;
+
+static default_validate_t pci_iov_validate_bool_default;
+static default_validate_t pci_iov_validate_string_default;
+static default_validate_t pci_iov_validate_uint_default;
+static default_validate_t pci_iov_validate_unicast_mac_default;
+
+struct config_type_validator {
+ const char *type_name;
+ validate_func *validate;
+ default_validate_t *default_validate;
+ uintmax_t limit;
+};
+
+static struct config_type_validator pci_iov_schema_validators[] = {
+ {
+ .type_name = "bool",
+ .validate = pci_iov_schema_validate_bool,
+ .default_validate = pci_iov_validate_bool_default
+ },
+ {
+ .type_name = "string",
+ .validate = pci_iov_schema_validate_string,
+ .default_validate = pci_iov_validate_string_default
+ },
+ {
+ .type_name = "uint8_t",
+ .validate = pci_iov_schema_validate_uint,
+ .default_validate = pci_iov_validate_uint_default,
+ .limit = UINT8_MAX
+ },
+ {
+ .type_name = "uint16_t",
+ .validate = pci_iov_schema_validate_uint,
+ .default_validate = pci_iov_validate_uint_default,
+ .limit = UINT16_MAX
+ },
+ {
+ .type_name = "uint32_t",
+ .validate = pci_iov_schema_validate_uint,
+ .default_validate = pci_iov_validate_uint_default,
+ .limit = UINT32_MAX
+ },
+ {
+ .type_name = "uint64_t",
+ .validate = pci_iov_schema_validate_uint,
+ .default_validate = pci_iov_validate_uint_default,
+ .limit = UINT64_MAX
+ },
+ {
+ .type_name = "unicast-mac",
+ .validate = pci_iov_schema_validate_unicast_mac,
+ .default_validate = pci_iov_validate_unicast_mac_default,
+ },
+};
+
+static const struct config_type_validator *
+pci_iov_schema_find_validator(const char *type)
+{
+ struct config_type_validator *validator;
+ int i;
+
+ for (i = 0; i < nitems(pci_iov_schema_validators); i++) {
+ validator = &pci_iov_schema_validators[i];
+ if (strcmp(type, validator->type_name) == 0)
+ return (validator);
+ }
+
+ return (NULL);
+}
+
+static void
+pci_iov_schema_add_type(nvlist_t *entry, const char *type)
+{
+
+ if (pci_iov_schema_find_validator(type) == NULL) {
+ nvlist_set_error(entry, EINVAL);
+ return;
+ }
+ nvlist_add_string(entry, "type", type);
+}
+
+static void
+pci_iov_schema_add_required(nvlist_t *entry, uint32_t flags)
+{
+
+ if (flags & IOV_SCHEMA_REQUIRED) {
+ if (flags & IOV_SCHEMA_HASDEFAULT) {
+ nvlist_set_error(entry, EINVAL);
+ return;
+ }
+
+ nvlist_add_bool(entry, "required", 1);
+ }
+}
+
+void
+pci_iov_schema_add_bool(nvlist_t *schema, const char *name, uint32_t flags,
+ int defaultVal)
+{
+ nvlist_t *entry;
+
+ entry = nvlist_create(NV_FLAG_IGNORE_CASE);
+ if (entry == NULL) {
+ nvlist_set_error(schema, ENOMEM);
+ return;
+ }
+
+ pci_iov_schema_add_type(entry, "bool");
+ if (flags & IOV_SCHEMA_HASDEFAULT)
+ nvlist_add_bool(entry, "default", defaultVal);
+ pci_iov_schema_add_required(entry, flags);
+
+ nvlist_move_nvlist(schema, name, entry);
+}
+
+void
+pci_iov_schema_add_string(nvlist_t *schema, const char *name, uint32_t flags,
+ const char *defaultVal)
+{
+ nvlist_t *entry;
+
+ entry = nvlist_create(NV_FLAG_IGNORE_CASE);
+ if (entry == NULL) {
+ nvlist_set_error(schema, ENOMEM);
+ return;
+ }
+
+ pci_iov_schema_add_type(entry, "string");
+ if (flags & IOV_SCHEMA_HASDEFAULT)
+ nvlist_add_string(entry, "default", defaultVal);
+ pci_iov_schema_add_required(entry, flags);
+
+ nvlist_move_nvlist(schema, name, entry);
+}
+
+static void
+pci_iov_schema_int(nvlist_t *schema, const char *name, const char *type,
+ uint32_t flags, uint64_t defaultVal)
+{
+ nvlist_t *entry;
+
+ entry = nvlist_create(NV_FLAG_IGNORE_CASE);
+ if (entry == NULL) {
+ nvlist_set_error(schema, ENOMEM);
+ return;
+ }
+
+ pci_iov_schema_add_type(entry, type);
+ if (flags & IOV_SCHEMA_HASDEFAULT)
+ nvlist_add_number(entry, "default", defaultVal);
+ pci_iov_schema_add_required(entry, flags);
+
+ nvlist_move_nvlist(schema, name, entry);
+}
+
+void
+pci_iov_schema_add_uint8(nvlist_t *schema, const char *name, uint32_t flags,
+ uint8_t defaultVal)
+{
+
+ pci_iov_schema_int(schema, name, "uint8_t", flags, defaultVal);
+}
+
+void
+pci_iov_schema_add_uint16(nvlist_t *schema, const char *name, uint32_t flags,
+ uint16_t defaultVal)
+{
+
+ pci_iov_schema_int(schema, name, "uint16_t", flags, defaultVal);
+}
+
+void
+pci_iov_schema_add_uint32(nvlist_t *schema, const char *name, uint32_t flags,
+ uint32_t defaultVal)
+{
+
+ pci_iov_schema_int(schema, name, "uint32_t", flags, defaultVal);
+}
+
+void
+pci_iov_schema_add_uint64(nvlist_t *schema, const char *name, uint32_t flags,
+ uint64_t defaultVal)
+{
+
+ pci_iov_schema_int(schema, name, "uint64_t", flags, defaultVal);
+}
+
+void
+pci_iov_schema_add_unicast_mac(nvlist_t *schema, const char *name,
+ uint32_t flags, const uint8_t * defaultVal)
+{
+ nvlist_t *entry;
+
+ entry = nvlist_create(NV_FLAG_IGNORE_CASE);
+ if (entry == NULL) {
+ nvlist_set_error(schema, ENOMEM);
+ return;
+ }
+
+ pci_iov_schema_add_type(entry, "unicast-mac");
+ if (flags & IOV_SCHEMA_HASDEFAULT)
+ nvlist_add_binary(entry, "default", defaultVal, ETHER_ADDR_LEN);
+ pci_iov_schema_add_required(entry, flags);
+
+ nvlist_move_nvlist(schema, name, entry);
+}
+
+static int
+pci_iov_schema_validate_bool(const struct config_type_validator * validator,
+ const nvlist_t *config, const char *name)
+{
+
+ if (!nvlist_exists_bool(config, name))
+ return (EINVAL);
+ return (0);
+}
+
+static int
+pci_iov_schema_validate_string(const struct config_type_validator * validator,
+ const nvlist_t *config, const char *name)
+{
+
+ if (!nvlist_exists_string(config, name))
+ return (EINVAL);
+ return (0);
+}
+
+static int
+pci_iov_schema_validate_uint(const struct config_type_validator * validator,
+ const nvlist_t *config, const char *name)
+{
+ uint64_t value;
+
+ if (!nvlist_exists_number(config, name))
+ return (EINVAL);
+
+ value = nvlist_get_number(config, name);
+
+ if (value > validator->limit)
+ return (EINVAL);
+
+ return (0);
+}
+
+static int
+pci_iov_schema_validate_unicast_mac(
+ const struct config_type_validator * validator,
+ const nvlist_t *config, const char *name)
+{
+ const uint8_t *mac;
+ size_t size;
+
+ if (!nvlist_exists_binary(config, name))
+ return (EINVAL);
+
+ mac = nvlist_get_binary(config, name, &size);
+
+ if (size != ETHER_ADDR_LEN)
+ return (EINVAL);
+
+ if (ETHER_IS_MULTICAST(mac))
+ return (EINVAL);
+
+ return (0);
+}
+
+static void
+pci_iov_config_add_default(const nvlist_t *param_schema, const char *name,
+ nvlist_t *config)
+{
+ const void *binary;
+ size_t len;
+
+ if (nvlist_exists_binary(param_schema, "default")) {
+ binary = nvlist_get_binary(param_schema, "default", &len);
+ nvlist_add_binary(config, name, binary, len);
+ } else if (nvlist_exists_bool(param_schema, "default"))
+ nvlist_add_bool(config, name,
+ nvlist_get_bool(param_schema, "default"));
+ else if (nvlist_exists_number(param_schema, "default"))
+ nvlist_add_number(config, name,
+ nvlist_get_number(param_schema, "default"));
+ else if (nvlist_exists_nvlist(param_schema, "default"))
+ nvlist_add_nvlist(config, name,
+ nvlist_get_nvlist(param_schema, "default"));
+ else if (nvlist_exists_string(param_schema, "default"))
+ nvlist_add_string(config, name,
+ nvlist_get_string(param_schema, "default"));
+ else
+ panic("Unexpected nvlist type");
+}
+
+static int
+pci_iov_validate_bool_default(const struct config_type_validator * validator,
+ const nvlist_t *param)
+{
+
+ if (!nvlist_exists_bool(param, DEFAULT_SCHEMA_NAME))
+ return (EINVAL);
+ return (0);
+}
+
+static int
+pci_iov_validate_string_default(const struct config_type_validator * validator,
+ const nvlist_t *param)
+{
+
+ if (!nvlist_exists_string(param, DEFAULT_SCHEMA_NAME))
+ return (EINVAL);
+ return (0);
+}
+
+static int
+pci_iov_validate_uint_default(const struct config_type_validator * validator,
+ const nvlist_t *param)
+{
+ uint64_t defaultVal;
+
+ if (!nvlist_exists_number(param, DEFAULT_SCHEMA_NAME))
+ return (EINVAL);
+
+ defaultVal = nvlist_get_number(param, DEFAULT_SCHEMA_NAME);
+ if (defaultVal > validator->limit)
+ return (EINVAL);
+ return (0);
+}
+
+static int
+pci_iov_validate_unicast_mac_default(
+ const struct config_type_validator * validator, const nvlist_t *param)
+{
+ const uint8_t *mac;
+ size_t size;
+
+ if (!nvlist_exists_binary(param, DEFAULT_SCHEMA_NAME))
+ return (EINVAL);
+
+ mac = nvlist_get_binary(param, DEFAULT_SCHEMA_NAME, &size);
+ if (size != ETHER_ADDR_LEN)
+ return (EINVAL);
+
+ if (ETHER_IS_MULTICAST(mac))
+ return (EINVAL);
+ return (0);
+}
+
+static int
+pci_iov_validate_param_schema(const nvlist_t *schema)
+{
+ const struct config_type_validator *validator;
+ const char *type;
+ int error;
+
+ /* All parameters must define a type. */
+ if (!nvlist_exists_string(schema, TYPE_SCHEMA_NAME))
+ return (EINVAL);
+ type = nvlist_get_string(schema, TYPE_SCHEMA_NAME);
+
+ validator = pci_iov_schema_find_validator(type);
+ if (validator == NULL)
+ return (EINVAL);
+
+ /* Validate that the default value conforms to the type. */
+ if (nvlist_exists(schema, DEFAULT_SCHEMA_NAME)) {
+ error = validator->default_validate(validator, schema);
+ if (error != 0)
+ return (error);
+
+ /* Required and Default are mutually exclusive. */
+ if (nvlist_exists(schema, REQUIRED_SCHEMA_NAME))
+ return (EINVAL);
+ }
+
+ /* The "Required" field must be a bool. */
+ if (nvlist_exists(schema, REQUIRED_SCHEMA_NAME)) {
+ if (!nvlist_exists_bool(schema, REQUIRED_SCHEMA_NAME))
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+static int
+pci_iov_validate_subsystem_schema(const nvlist_t *dev_schema, const char *name)
+{
+ const nvlist_t *sub_schema, *param_schema;
+ const char *param_name;
+ void *it;
+ int type, error;
+
+ if (!nvlist_exists_nvlist(dev_schema, name))
+ return (EINVAL);
+ sub_schema = nvlist_get_nvlist(dev_schema, name);
+
+ it = NULL;
+ while ((param_name = nvlist_next(sub_schema, &type, &it)) != NULL) {
+ if (type != NV_TYPE_NVLIST)
+ return (EINVAL);
+ param_schema = nvlist_get_nvlist(sub_schema, param_name);
+
+ error = pci_iov_validate_param_schema(param_schema);
+ if (error != 0)
+ return (error);
+ }
+
+ return (0);
+}
+
+/*
+ * Validate that the driver schema does not define any configuration parameters
+ * whose names collide with configuration parameters defined in the iov schema.
+ */
+static int
+pci_iov_validate_param_collisions(const nvlist_t *dev_schema)
+{
+ const nvlist_t *iov_schema, *driver_schema;
+ const char *name;
+ void *it;
+ int type;
+
+ driver_schema = nvlist_get_nvlist(dev_schema, DRIVER_CONFIG_NAME);
+ iov_schema = nvlist_get_nvlist(dev_schema, IOV_CONFIG_NAME);
+
+ it = NULL;
+ while ((name = nvlist_next(driver_schema, &type, &it)) != NULL) {
+ if (nvlist_exists(iov_schema, name))
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * Validate that we only have IOV and DRIVER subsystems beneath the given
+ * device schema node.
+ */
+static int
+pci_iov_validate_schema_subsystems(const nvlist_t *dev_schema)
+{
+ const char *name;
+ void *it;
+ int type;
+
+ it = NULL;
+ while ((name = nvlist_next(dev_schema, &type, &it)) != NULL) {
+ if (strcmp(name, IOV_CONFIG_NAME) != 0 &&
+ strcmp(name, DRIVER_CONFIG_NAME) != 0)
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+static int
+pci_iov_validate_device_schema(const nvlist_t *schema, const char *name)
+{
+ const nvlist_t *dev_schema;
+ int error;
+
+ if (!nvlist_exists_nvlist(schema, name))
+ return (EINVAL);
+ dev_schema = nvlist_get_nvlist(schema, name);
+
+ error = pci_iov_validate_subsystem_schema(dev_schema, IOV_CONFIG_NAME);
+ if (error != 0)
+ return (error);
+
+ error = pci_iov_validate_subsystem_schema(dev_schema,
+ DRIVER_CONFIG_NAME);
+ if (error != 0)
+ return (error);
+
+ error = pci_iov_validate_param_collisions(dev_schema);
+ if (error != 0)
+ return (error);
+
+ return (pci_iov_validate_schema_subsystems(dev_schema));
+}
+
+/* Validate that we only have PF and VF devices beneath the top-level schema. */
+static int
+pci_iov_validate_schema_devices(const nvlist_t *dev_schema)
+{
+ const char *name;
+ void *it;
+ int type;
+
+ it = NULL;
+ while ((name = nvlist_next(dev_schema, &type, &it)) != NULL) {
+ if (strcmp(name, PF_CONFIG_NAME) != 0 &&
+ strcmp(name, VF_SCHEMA_NAME) != 0)
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+int
+pci_iov_validate_schema(const nvlist_t *schema)
+{
+ int error;
+
+ error = pci_iov_validate_device_schema(schema, PF_CONFIG_NAME);
+ if (error != 0)
+ return (error);
+
+ error = pci_iov_validate_device_schema(schema, VF_SCHEMA_NAME);
+ if (error != 0)
+ return (error);
+
+ return (pci_iov_validate_schema_devices(schema));
+}
+
+/*
+ * Validate that all required parameters from the schema are specified in the
+ * config. If any parameter with a default value is not specified in the
+ * config, add it to config.
+ */
+static int
+pci_iov_schema_validate_required(const nvlist_t *schema, nvlist_t *config)
+{
+ const nvlist_t *param_schema;
+ const char *name;
+ void *cookie;
+ int type;
+
+ cookie = NULL;
+ while ((name = nvlist_next(schema, &type, &cookie)) != NULL) {
+ param_schema = nvlist_get_nvlist(schema, name);
+
+ if (dnvlist_get_bool(param_schema, "required", 0)) {
+ if (!nvlist_exists(config, name))
+ return (EINVAL);
+ }
+
+ if (nvlist_exists(param_schema, "default") &&
+ !nvlist_exists(config, name))
+ pci_iov_config_add_default(param_schema, name, config);
+ }
+
+ return (nvlist_error(config));
+}
+
+static int
+pci_iov_schema_validate_param(const nvlist_t *schema_param, const char *name,
+ const nvlist_t *config)
+{
+ const struct config_type_validator *validator;
+ const char *type;
+
+ type = nvlist_get_string(schema_param, "type");
+ validator = pci_iov_schema_find_validator(type);
+
+ KASSERT(validator != NULL,
+ ("Schema was not validated: Unknown type %s", type));
+
+ return (validator->validate(validator, config, name));
+}
+
+/*
+ * Validate that all parameters in config are defined in the schema. Also
+ * validate that the type of the parameter matches the type in the schema.
+ */
+static int
+pci_iov_schema_validate_types(const nvlist_t *schema, const nvlist_t *config)
+{
+ const nvlist_t *schema_param;
+ void *cookie;
+ const char *name;
+ int type, error;
+
+ cookie = NULL;
+ while ((name = nvlist_next(config, &type, &cookie)) != NULL) {
+ if (!nvlist_exists_nvlist(schema, name))
+ return (EINVAL);
+
+ schema_param = nvlist_get_nvlist(schema, name);
+
+ error = pci_iov_schema_validate_param(schema_param, name,
+ config);
+
+ if (error != 0)
+ return (error);
+ }
+
+ return (0);
+}
+
+static int
+pci_iov_schema_validate_device(const nvlist_t *schema, nvlist_t *config,
+ const char *schema_device, const char *config_device)
+{
+ const nvlist_t *device_schema, *iov_schema, *driver_schema;
+ nvlist_t *device_config, *iov_config, *driver_config;
+ int error;
+
+ device_config = NULL;
+ iov_config = NULL;
+ driver_config = NULL;
+
+ device_schema = nvlist_get_nvlist(schema, schema_device);
+ iov_schema = nvlist_get_nvlist(device_schema, IOV_CONFIG_NAME);
+ driver_schema = nvlist_get_nvlist(device_schema, DRIVER_CONFIG_NAME);
+
+ device_config = dnvlist_take_nvlist(config, config_device, NULL);
+ if (device_config == NULL) {
+ error = EINVAL;
+ goto out;
+ }
+
+ iov_config = dnvlist_take_nvlist(device_config, IOV_CONFIG_NAME, NULL);
+ if (iov_config == NULL) {
+ error = EINVAL;
+ goto out;
+ }
+
+ driver_config = dnvlist_take_nvlist(device_config, DRIVER_CONFIG_NAME,
+ NULL);
+ if (driver_config == NULL) {
+ error = EINVAL;
+ goto out;
+ }
+
+ error = pci_iov_schema_validate_required(iov_schema, iov_config);
+ if (error != 0)
+ goto out;
+
+ error = pci_iov_schema_validate_required(driver_schema, driver_config);
+ if (error != 0)
+ goto out;
+
+ error = pci_iov_schema_validate_types(iov_schema, iov_config);
+ if (error != 0)
+ goto out;
+
+ error = pci_iov_schema_validate_types(driver_schema, driver_config);
+ if (error != 0)
+ goto out;
+
+out:
+ /* Note that these functions handle NULL pointers safely. */
+ nvlist_move_nvlist(device_config, IOV_CONFIG_NAME, iov_config);
+ nvlist_move_nvlist(device_config, DRIVER_CONFIG_NAME, driver_config);
+ nvlist_move_nvlist(config, config_device, device_config);
+
+ return (error);
+}
+
+static int
+pci_iov_schema_validate_vfs(const nvlist_t *schema, nvlist_t *config,
+ uint16_t num_vfs)
+{
+ char device[VF_MAX_NAME];
+ int i, error;
+
+ for (i = 0; i < num_vfs; i++) {
+ snprintf(device, sizeof(device), VF_PREFIX"%d", i);
+
+ error = pci_iov_schema_validate_device(schema, config,
+ VF_SCHEMA_NAME, device);
+ if (error != 0)
+ return (error);
+ }
+
+ return (0);
+}
+
+/*
+ * Validate that the device node only has IOV and DRIVER subnodes.
+ */
+static int
+pci_iov_schema_validate_device_subsystems(const nvlist_t *config)
+{
+ void *cookie;
+ const char *name;
+ int type;
+
+ cookie = NULL;
+ while ((name = nvlist_next(config, &type, &cookie)) != NULL) {
+ if (strcasecmp(name, IOV_CONFIG_NAME) == 0)
+ continue;
+ else if (strcasecmp(name, DRIVER_CONFIG_NAME) == 0)
+ continue;
+
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * Validate that the string is a valid device node name. It must either be "PF"
+ * or "VF-n", where n is an integer in the range [0, num_vfs).
+ */
+static int
+pci_iov_schema_validate_dev_name(const char *name, uint16_t num_vfs)
+{
+ const char *number_start;
+ char *endp;
+ u_long vf_num;
+
+ if (strcasecmp(PF_CONFIG_NAME, name) == 0)
+ return (0);
+
+ /* Ensure that we start with "VF-" */
+ if (strncasecmp(name, VF_PREFIX, VF_PREFIX_LEN) != 0)
+ return (EINVAL);
+
+ number_start = name + VF_PREFIX_LEN;
+
+ /* Filter out name == "VF-" (no number) */
+ if (number_start[0] == '\0')
+ return (EINVAL);
+
+ /* Disallow leading whitespace or +/- */
+ if (!isdigit(number_start[0]))
+ return (EINVAL);
+
+ vf_num = strtoul(number_start, &endp, 10);
+ if (*endp != '\0')
+ return (EINVAL);
+
+ /* Disallow leading zeros on VF-[1-9][0-9]* */
+ if (vf_num != 0 && number_start[0] == '0')
+ return (EINVAL);
+
+ /* Disallow leading zeros on VF-0 */
+ if (vf_num == 0 && number_start[1] != '\0')
+ return (EINVAL);
+
+ if (vf_num >= num_vfs)
+ return (EINVAL);
+
+ return (0);
+}
+
+/*
+ * Validate that there are no device nodes in config other than the ones for
+ * the PF and the VFs. This includes validating that all config nodes of the
+ * form VF-n specify a VF number that is < num_vfs.
+ */
+static int
+pci_iov_schema_validate_device_names(const nvlist_t *config, uint16_t num_vfs)
+{
+ const nvlist_t *device;
+ void *cookie;
+ const char *name;
+ int type, error;
+
+ cookie = NULL;
+ while ((name = nvlist_next(config, &type, &cookie)) != NULL) {
+ error = pci_iov_schema_validate_dev_name(name, num_vfs);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Note that as this is a valid PF/VF node, we know that
+ * pci_iov_schema_validate_device() has already checked that
+ * the PF/VF node is an nvlist.
+ */
+ device = nvlist_get_nvlist(config, name);
+ error = pci_iov_schema_validate_device_subsystems(device);
+ if (error != 0)
+ return (error);
+ }
+
+ return (0);
+}
+
+int
+pci_iov_schema_validate_config(const nvlist_t *schema, nvlist_t *config)
+{
+ int error;
+ uint16_t num_vfs;
+
+ error = pci_iov_schema_validate_device(schema, config, PF_CONFIG_NAME,
+ PF_CONFIG_NAME);
+ if (error != 0)
+ return (error);
+
+ num_vfs = pci_iov_config_get_num_vfs(config);
+
+ error = pci_iov_schema_validate_vfs(schema, config, num_vfs);
+ if (error != 0)
+ return (error);
+
+ return (pci_iov_schema_validate_device_names(config, num_vfs));
+}
+
+/*
+ * Return value of the num_vfs parameter. config must have already been
+ * validated, which guarantees that the parameter exists.
+ */
+uint16_t
+pci_iov_config_get_num_vfs(const nvlist_t *config)
+{
+ const nvlist_t *pf, *iov;
+
+ pf = nvlist_get_nvlist(config, PF_CONFIG_NAME);
+ iov = nvlist_get_nvlist(pf, IOV_CONFIG_NAME);
+ return (nvlist_get_number(iov, "num_vfs"));
+}
+
+/* Allocate a new empty schema node. */
+nvlist_t *
+pci_iov_schema_alloc_node(void)
+{
+
+ return (nvlist_create(NV_FLAG_IGNORE_CASE));
+}
Index: sys/dev/pci/pci_pci.c
===================================================================
--- sys/dev/pci/pci_pci.c
+++ sys/dev/pci/pci_pci.c
@@ -57,13 +57,16 @@
static int pcib_power_for_sleep(device_t pcib, device_t dev,
int *pstate);
static uint16_t pcib_ari_get_rid(device_t pcib, device_t dev);
-static uint32_t pcib_read_config(device_t dev, u_int b, u_int s,
+static uint32_t pcib_read_config(device_t dev, u_int b, u_int s,
u_int f, u_int reg, int width);
static void pcib_write_config(device_t dev, u_int b, u_int s,
u_int f, u_int reg, uint32_t val, int width);
static int pcib_ari_maxslots(device_t dev);
static int pcib_ari_maxfuncs(device_t dev);
static int pcib_try_enable_ari(device_t pcib, device_t dev);
+static int pcib_ari_enabled(device_t pcib);
+static void pcib_ari_decode_rid(device_t pcib, uint16_t rid,
+ int *bus, int *slot, int *func);
static device_method_t pcib_methods[] = {
/* Device interface */
@@ -104,6 +107,8 @@
DEVMETHOD(pcib_power_for_sleep, pcib_power_for_sleep),
DEVMETHOD(pcib_get_rid, pcib_ari_get_rid),
DEVMETHOD(pcib_try_enable_ari, pcib_try_enable_ari),
+ DEVMETHOD(pcib_ari_enabled, pcib_ari_enabled),
+ DEVMETHOD(pcib_decode_rid, pcib_ari_decode_rid),
DEVMETHOD_END
};
@@ -1867,6 +1872,24 @@
return (PCI_FUNCMAX);
}
+static void
+pcib_ari_decode_rid(device_t pcib, uint16_t rid, int *bus, int *slot,
+ int *func)
+{
+ struct pcib_softc *sc;
+
+ sc = device_get_softc(pcib);
+
+ *bus = PCI_RID2BUS(rid);
+ if (sc->flags & PCIB_ENABLE_ARI) {
+ *slot = PCIE_ARI_RID2SLOT(rid);
+ *func = PCIE_ARI_RID2FUNC(rid);
+ } else {
+ *slot = PCI_RID2SLOT(rid);
+ *func = PCI_RID2FUNC(rid);
+ }
+}
+
/*
* Since we are a child of a PCI bus, its parent must support the pcib interface.
*/
@@ -1998,6 +2021,16 @@
return (PCIB_POWER_FOR_SLEEP(bus, dev, pstate));
}
+static int
+pcib_ari_enabled(device_t pcib)
+{
+ struct pcib_softc *sc;
+
+ sc = device_get_softc(pcib);
+
+ return ((sc->flags & PCIB_ENABLE_ARI) != 0);
+}
+
static uint16_t
pcib_ari_get_rid(device_t pcib, device_t dev)
{
Index: sys/dev/pci/pci_private.h
===================================================================
--- sys/dev/pci/pci_private.h
+++ sys/dev/pci/pci_private.h
@@ -51,6 +51,8 @@
void pci_add_children(device_t dev, int domain, int busno,
size_t dinfo_size);
void pci_add_child(device_t bus, struct pci_devinfo *dinfo);
+device_t pci_add_iov_child(device_t bus, device_t pf, size_t dinfo_size,
+ uint16_t rid, uint16_t vid, uint16_t did);
void pci_add_resources(device_t bus, device_t dev, int force,
uint32_t prefetchmask);
int pci_attach_common(device_t dev);
@@ -133,4 +135,26 @@
*/
void pci_cfg_save(device_t, struct pci_devinfo *, int);
+int pci_mapsize(uint64_t testval);
+void pci_read_bar(device_t dev, int reg, pci_addr_t *mapp,
+ pci_addr_t *testvalp, int *bar64);
+struct pci_map *pci_add_bar(device_t dev, int reg, pci_addr_t value,
+ pci_addr_t size);
+
+struct resource *pci_alloc_multi_resource(device_t dev, device_t child,
+ int type, int *rid, u_long start, u_long end, u_long count,
+ u_long num, u_int flags);
+
+int pci_iov_attach_method(device_t bus, device_t dev,
+ struct nvlist *pf_schema, struct nvlist *vf_schema);
+int pci_iov_detach_method(device_t bus, device_t dev);
+
+device_t pci_create_iov_child_method(device_t bus, device_t pf,
+ uint16_t rid, uint16_t vid, uint16_t did);
+
+struct resource *pci_vf_alloc_mem_resource(device_t dev, device_t child,
+ int *rid, u_long start, u_long end, u_long count,
+ u_int flags);
+int pci_vf_release_mem_resource(device_t dev, device_t child,
+ int rid, struct resource *r);
#endif /* _PCI_PRIVATE_H_ */
Index: sys/dev/pci/pci_user.c
===================================================================
--- sys/dev/pci/pci_user.c
+++ sys/dev/pci/pci_user.c
@@ -492,7 +492,7 @@
static int
pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
{
- device_t pcidev, brdev;
+ device_t pcidev;
void *confdata;
const char *name;
struct devlist *devlist_head;
@@ -922,37 +922,25 @@
io->pi_sel.pc_bus, io->pi_sel.pc_dev,
io->pi_sel.pc_func);
if (pcidev) {
- brdev = device_get_parent(
- device_get_parent(pcidev));
-
#ifdef PRE7_COMPAT
if (cmd == PCIOCWRITE || cmd == PCIOCWRITE_OLD)
#else
if (cmd == PCIOCWRITE)
#endif
- PCIB_WRITE_CONFIG(brdev,
- io->pi_sel.pc_bus,
- io->pi_sel.pc_dev,
- io->pi_sel.pc_func,
+ pci_write_config(pcidev,
io->pi_reg,
io->pi_data,
io->pi_width);
#ifdef PRE7_COMPAT
else if (cmd == PCIOCREAD_OLD)
io_old->pi_data =
- PCIB_READ_CONFIG(brdev,
- io->pi_sel.pc_bus,
- io->pi_sel.pc_dev,
- io->pi_sel.pc_func,
+ pci_read_config(pcidev,
io->pi_reg,
io->pi_width);
#endif
else
io->pi_data =
- PCIB_READ_CONFIG(brdev,
- io->pi_sel.pc_bus,
- io->pi_sel.pc_dev,
- io->pi_sel.pc_func,
+ pci_read_config(pcidev,
io->pi_reg,
io->pi_width);
error = 0;
Index: sys/dev/pci/pcib_if.m
===================================================================
--- sys/dev/pci/pcib_if.m
+++ sys/dev/pci/pcib_if.m
@@ -23,7 +23,7 @@
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
-# $FreeBSD$
+# $FreeBSD: head/sys/dev/pci/pcib_if.m 289494 2015-10-18 08:13:51Z jmg $
#
#include <sys/bus.h>
@@ -39,6 +39,13 @@
{
return (PCI_INVALID_IRQ);
}
+
+ static int
+ pcib_null_ari_enabled(device_t pcib)
+ {
+
+ return (0);
+ }
};
#
@@ -90,7 +97,7 @@
};
#
-# Route an interrupt. Returns a value suitable for stuffing into
+# Route an interrupt. Returns a value suitable for stuffing into
# a device's interrupt register.
#
METHOD int route_interrupt {
@@ -182,3 +189,20 @@
device_t dev;
};
+#
+# Return non-zero if PCI ARI is enabled, or zero otherwise
+#
+METHOD int ari_enabled {
+ device_t pcib;
+} DEFAULT pcib_null_ari_enabled;
+
+#
+# Decode a PCI Routing Identifier (RID) into PCI bus/slot/function
+#
+METHOD void decode_rid {
+ device_t pcib;
+ uint16_t rid;
+ int *bus;
+ int *slot;
+ int *func;
+} DEFAULT pcib_decode_rid;
Index: sys/dev/pci/pcib_private.h
===================================================================
--- sys/dev/pci/pcib_private.h
+++ sys/dev/pci/pcib_private.h
@@ -170,5 +170,7 @@
int pcib_release_msix(device_t pcib, device_t dev, int irq);
int pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data);
uint16_t pcib_get_rid(device_t pcib, device_t dev);
+void pcib_decode_rid(device_t pcib, uint16_t rid, int *bus,
+ int *slot, int *func);
#endif
Index: sys/dev/pci/pcib_support.c
===================================================================
--- sys/dev/pci/pcib_support.c
+++ sys/dev/pci/pcib_support.c
@@ -66,3 +66,13 @@
return (PCI_RID(bus, slot, func));
}
+void
+pcib_decode_rid(device_t pcib, uint16_t rid, int *bus, int *slot,
+ int *func)
+{
+
+ *bus = PCI_RID2BUS(rid);
+ *slot = PCI_RID2SLOT(rid);
+ *func = PCI_RID2FUNC(rid);
+}
+
Index: sys/dev/pci/pcireg.h
===================================================================
--- sys/dev/pci/pcireg.h
+++ sys/dev/pci/pcireg.h
@@ -68,6 +68,10 @@
#define PCI_RID2SLOT(rid) (((rid) >> PCI_RID_SLOT_SHIFT) & PCI_SLOTMAX)
#define PCI_RID2FUNC(rid) (((rid) >> PCI_RID_FUNC_SHIFT) & PCI_FUNCMAX)
+#define PCIE_ARI_RID2SLOT(rid) (0)
+#define PCIE_ARI_RID2FUNC(rid) \
+ (((rid) >> PCI_RID_FUNC_SHIFT) & PCIE_ARI_FUNCMAX)
+
#define PCIE_ARI_SLOT(func) (((func) >> PCI_RID_SLOT_SHIFT) & PCI_SLOTMAX)
#define PCIE_ARI_FUNC(func) (((func) >> PCI_RID_FUNC_SHIFT) & PCI_FUNCMAX)
@@ -920,3 +924,21 @@
#define PCIR_SERIAL_LOW 0x04
#define PCIR_SERIAL_HIGH 0x08
+/* SR-IOV definitions */
+#define PCIR_SRIOV_CTL 0x08
+#define PCIM_SRIOV_VF_EN 0x01
+#define PCIM_SRIOV_VF_MSE 0x08 /* Memory space enable. */
+#define PCIM_SRIOV_ARI_EN 0x10
+#define PCIR_SRIOV_TOTAL_VFS 0x0E
+#define PCIR_SRIOV_NUM_VFS 0x10
+#define PCIR_SRIOV_VF_OFF 0x14
+#define PCIR_SRIOV_VF_STRIDE 0x16
+#define PCIR_SRIOV_VF_DID 0x1A
+#define PCIR_SRIOV_PAGE_CAP 0x1C
+#define PCIR_SRIOV_PAGE_SIZE 0x20
+
+#define PCI_SRIOV_BASE_PAGE_SHIFT 12
+
+#define PCIR_SRIOV_BARS 0x24
+#define PCIR_SRIOV_BAR(x) (PCIR_SRIOV_BARS + (x) * 4)
+
Index: sys/dev/pci/pcivar.h
===================================================================
--- sys/dev/pci/pcivar.h
+++ sys/dev/pci/pcivar.h
@@ -143,6 +143,12 @@
uint8_t pcix_location; /* Offset of PCI-X capability registers. */
};
+struct pcicfg_vf {
+ int index;
+};
+
+#define PCICFG_VF 0x0001 /* Device is an SR-IOV Virtual Function */
+
/* config header information common to all header types */
typedef struct pcicfg {
struct device *dev; /* device which owns this */
@@ -179,6 +185,9 @@
uint8_t slot; /* config space slot address */
uint8_t func; /* config space function number */
+ uint32_t flags; /* flags defined above */
+ size_t devinfo_size; /* Size of devinfo for this bus type. */
+
struct pcicfg_pp pp; /* Power management */
struct pcicfg_vpd vpd; /* Vital product data */
struct pcicfg_msi msi; /* PCI MSI */
@@ -186,6 +195,8 @@
struct pcicfg_ht ht; /* HyperTransport */
struct pcicfg_pcie pcie; /* PCI Express */
struct pcicfg_pcix pcix; /* PCI-X */
+ struct pcicfg_iov *iov; /* SR-IOV */
+ struct pcicfg_vf vf; /* SR-IOV Virtual Function */
} pcicfgregs;
/* additional type 1 device config header information (PCI to PCI bridge) */
Index: sys/dev/pci/schema_private.h
===================================================================
--- /dev/null
+++ sys/dev/pci/schema_private.h
@@ -0,0 +1,37 @@
+/*-
+ * Copyright (c) 2014 Sandvine Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SCHEMA_PRIVATE_H_
+#define _SCHEMA_PRIVATE_H_
+
+int pci_iov_validate_schema(const nvlist_t *schema);
+
+int pci_iov_schema_validate_config(const nvlist_t *, nvlist_t *);
+uint16_t pci_iov_config_get_num_vfs(const nvlist_t *);
+
+#endif
Index: sys/i386/conf/GENERIC
===================================================================
--- sys/i386/conf/GENERIC
+++ sys/i386/conf/GENERIC
@@ -91,6 +91,7 @@
device acpi
device eisa
device pci
+device PCI_IOV # PCI SR-IOV support
# Floppy drives
device fdc
Index: sys/kern/subr_bus.c
===================================================================
--- sys/kern/subr_bus.c
+++ sys/kern/subr_bus.c
@@ -2679,6 +2679,25 @@
}
/**
+ * @brief Set the devclass of a device and mark the devclass fixed.
+ * @see device_set_devclass()
+ */
+int
+device_set_devclass_fixed(device_t dev, const char *classname)
+{
+ int error;
+
+ if (classname == NULL)
+ return (EINVAL);
+
+ error = device_set_devclass(dev, classname);
+ if (error)
+ return (error);
+ dev->flags |= DF_FIXEDCLASS;
+ return (0);
+}
+
+/**
* @brief Set the driver of a device
*
* @retval 0 success
Index: sys/sys/bus.h
===================================================================
--- sys/sys/bus.h
+++ sys/sys/bus.h
@@ -465,6 +465,7 @@
void device_set_desc(device_t dev, const char* desc);
void device_set_desc_copy(device_t dev, const char* desc);
int device_set_devclass(device_t dev, const char *classname);
+int device_set_devclass_fixed(device_t dev, const char *classname);
int device_set_driver(device_t dev, driver_t *driver);
void device_set_flags(device_t dev, u_int32_t flags);
void device_set_softc(device_t dev, void *softc);
Index: sys/sys/iov.h
===================================================================
--- /dev/null
+++ sys/sys/iov.h
@@ -0,0 +1,257 @@
+/*-
+ * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/sys/iov.h 279453 2015-03-01 00:40:57Z rstone $
+ */
+
+#ifndef _SYS_IOV_H_
+#define _SYS_IOV_H_
+
+#include <sys/ioccom.h>
+
+#define PF_CONFIG_NAME "PF"
+#define VF_SCHEMA_NAME "VF"
+
+#define VF_PREFIX "VF-"
+#define VF_PREFIX_LEN 3
+#define VF_NUM_LEN 5 /* The maximum VF num is 65535. */
+#define VF_MAX_NAME (VF_PREFIX_LEN + VF_NUM_LEN + 1)
+
+#define DRIVER_CONFIG_NAME "DRIVER"
+#define IOV_CONFIG_NAME "IOV"
+
+#define TYPE_SCHEMA_NAME "TYPE"
+#define DEFAULT_SCHEMA_NAME "DEFAULT"
+#define REQUIRED_SCHEMA_NAME "REQUIRED"
+
+/*
+ * Because each PF device is expected to expose a unique set of possible
+ * configurations, the SR-IOV infrastructure dynamically queries the PF
+ * driver for its capabilities. These capabilities are exposed to userland
+ * with a configuration schema. The schema is exported from the kernel as a
+ * packed nvlist. See nv(3) for the details of the nvlist API. The expected
+ * format of the nvlist is:
+ *
+ * BASIC RULES
+ * 1) All keys are case-insensitive.
+ * 2) No keys that are not specified below may exist at any level of the
+ * schema.
+ * 3) All keys are mandatory unless explicitly documented as optional. If a
+ * key is mandatory then the associated value is also mandatory.
+ * 4) Order of keys is irrelevant.
+ *
+ * TOP LEVEL
+ * 1) There must be a top-level key with the name PF_CONFIG_NAME. The value
+ * associated with this key is a nvlist that follows the device schema
+ * node format. The parameters in this node specify the configuration
+ * parameters that may be applied to a PF.
+ * 2) There must be a top-level key with the name VF_SCHEMA_NAME. The value
+ * associated with this key is a nvlist that follows the device schema
+ * node format. The parameters in this node specify the configuration
+ * parameters that may be applied to a VF.
+ *
+ * DEVICE SCHEMA NODE
+ * 1) There must be a key with the name DRIVER_CONFIG_NAME. The value
+ * associated with this key is a nvlist that follows the device/subsystem
+ * schema node format. The parameters in this node specify the
+ * configuration parameters that are specific to a particular device
+ * driver.
+ * 2) There must be a key with the name IOV_CONFIG_NAME. The value associated
+ * with this key is an nvlist that follows the device/subsystem schema node
+ * format. The parameters in this node specify the configuration
+ * parameters that are applied by the SR-IOV infrastructure.
+ *
+ * DEVICE/SUBSYSTEM SCHEMA NODE
+ * 1) All keys in the device/subsystem schema node are optional.
+ * 2) Each key specifies the name of a valid configuration parameter that may
+ * be applied to the device/subsystem combination specified by this node.
+ * The value associated with the key specifies the format of valid
+ * configuration values, and must be a nvlist in parameter schema node
+ * format.
+ *
+ * PARAMETER SCHEMA NODE
+ * 1) The parameter schema node must contain a key with the name
+ * TYPE_SCHEMA_NAME. The value associated with this key must be a string.
+ * This string specifies the type of value that the parameter specified by
+ * this node must take. The string must have one of the following values:
+ * - "bool" - The configuration value must be a boolean.
+ * - "mac-addr" - The configuration value must be a binary value. In
+ * addition, the value must be exactly 6 bytes long and
+ * the value must not be a multicast or broadcast mac.
+ * - "uint8_t" - The configuration value must be a integer value in
+ * the range [0, UINT8_MAX].
+ * - "uint16_t" - The configuration value must be a integer value in
+ * the range [0, UINT16_MAX].
+ * - "uint32_t" - The configuration value must be a integer value in
+ * the range [0, UINT32_MAX].
+ * - "uint64_t" - The configuration value must be a integer value in
+ * the range [0, UINT64_MAX].
+ * 2) The parameter schema may contain a key with the name
+ * REQUIRED_SCHEMA_NAME. This key is optional. If this key is present, the
+ * value associated with it must have a boolean type. If the value is true,
+ * then the parameter specified by this schema is a required parameter. All
+ * valid configurations must include all required parameters.
+ * 3) The parameter schema may contain a key with the name DEFAULT_SCHEMA_NAME.
+ * This key is optional. This key must not be present if the parameter
+ * specified by this schema is required. If this key is present, the value
+ * associated with the parent key must follow all restrictions specified by
+ * the type specified by this schema. If a configuration does not supply a
+ * value for the parameter specified by this schema, then the kernel will
+ * apply the value associated with this key in its place.
+ *
+ * The following is an example of a valid schema, as printed by nvlist_dump.
+ * Keys are printed followed by the type of the value in parantheses. The
+ * value is displayed following a colon. The indentation level reflects the
+ * level of nesting of nvlists. String values are displayed between []
+ * brackets. Binary values are shown with the length of the binary value (in
+ * bytes) followed by the actual binary values.
+ *
+ * PF (NVLIST):
+ * IOV (NVLIST):
+ * num_vfs (NVLIST):
+ * type (STRING): [uint16_t]
+ * required (BOOL): TRUE
+ * device (NVLIST):
+ * type (STRING): [string]
+ * required (BOOL): TRUE
+ * DRIVER (NVLIST):
+ * VF (NVLIST):
+ * IOV (NVLIST):
+ * passthrough (NVLIST):
+ * type (STRING): [bool]
+ * default (BOOL): FALSE
+ * DRIVER (NVLIST):
+ * mac-addr (NVLIST):
+ * type (STRING): [mac-addr]
+ * default (BINARY): 6 000000000000
+ * vlan (NVLIST):
+ * type (STRING): [uint16_t]
+ * spoof-check (NVLIST):
+ * type (STRING): [bool]
+ * default (BOOL): TRUE
+ * allow-set-mac (NVLIST):
+ * type (STRING): [bool]
+ * default (BOOL): FALSE
+ */
+struct pci_iov_schema
+{
+ void *schema;
+ size_t len;
+ int error;
+};
+
+/*
+ * SR-IOV configuration is passed to the kernel as a packed nvlist. See nv(3)
+ * for the details of the nvlist API. The expected format of the nvlist is:
+ *
+ * BASIC RULES
+ * 1) All keys are case-insensitive.
+ * 2) No keys that are not specified below may exist at any level of the
+ * config nvlist.
+ * 3) Unless otherwise specified, all keys are optional. It should go without
+ * saying a key being mandatory is transitive: that is, if a key is
+ * specified to contain a sub-nodes that contains a mandatory key, then
+ * the outer key is implicitly mandatory. If a key is mandatory then the
+ * associated value is also mandatory.
+ * 4) Order of keys is irrelevant.
+ *
+ * TOP LEVEL OF CONFIG NVLIST
+ * 1) All keys specified in this section are mandatory.
+ * 2) There must be a top-level key with the name PF_CONFIG_NAME. The value
+ * associated is an nvlist that follows the "device node" format. The
+ * parameters in this node specify parameters that apply to the PF.
+ * 3) For every VF being configured (this is set via the "num_vfs" parameter
+ * in the PF section), there must be a top-level key whose name is VF_PREFIX
+ * immediately followed by the index of the VF as a decimal integer. For
+ * example, this would be VF-0 for the first VF. VFs are numbered starting
+ * from 0. The value associated with this key follows the "device node"
+ * format. The parameters in this node specify configuration that applies
+ * to the VF specified in the key. Leading zeros are not permitted in VF
+ * index. Configuration for the second VF must be specified in a node with
+ * the key VF-1. VF-01 is not a valid key.
+ *
+ * DEVICE NODES
+ * 1) All keys specified in this section are mandatory.
+ * 2) The device node must contain a key with the name DRIVER_CONFIG_NAME. The
+ * value associated with this key is an nvlist following the subsystem node
+ * format. The parameters in this key specify configuration that is specific
+ * to a particular device driver.
+ * 3) The device node must contain a key with the name IOV_CONFIG_NAME. The
+ * value associated with this key is an nvlist following the subsystem node
+ * format. The parameters in this key specify configuration that is consumed
+ * by the SR-IOV infrastructure.
+ *
+ * SUBSYSTEM NODES
+ * 1) A subsystem node specifies configuration parameters that apply to a
+ * particular subsystem (driver or infrastructure) of a particular device
+ * (PF or individual VF).
+ * Note: We will refer to the section of the configuration schema that
+ * specifies the parameters for this subsystem and device
+ * configuration as the device/subystem schema.
+ * 2) The subsystem node must contain only keys that correspond to parameters
+ * that are specified in the device/subsystem schema.
+ * 3) Every parameter specified as required in the device/subsystem schema is
+ * a mandatory key in the subsystem node.
+ * Note: All parameters that are not required in device/subsystem schema are
+ * optional keys. In particular, any parameter specified to have a
+ * default value in the device/subsystem schema is optional. The
+ * kernel is responsible for applying default values.
+ * 4) The value of every parameter in the device node must conform to the
+ * restrictions of the type specified for that parameter in the device/
+ * subsystem schema.
+ *
+ * The following is an example of a valid configuration, when validated against
+ * the schema example given above.
+ *
+ * PF (NVLIST):
+ * driver (NVLIST):
+ * iov (NVLIST):
+ * num_vfs (NUMBER): 3 (3) (0x3)
+ * device (STRING): [ix0]
+ * VF-0 (NVLIST):
+ * driver (NVLIST):
+ * vlan (NUMBER): 1000 (1000) (0x3e8)
+ * iov (NVLIST):
+ * passthrough (BOOL): TRUE
+ * VF-1 (NVLIST):
+ * driver (NVLIST):
+ * iov (NVLIST):
+ * VF-2 (NVLIST):
+ * driver (NVLIST):
+ * mac-addr (BINARY): 6 020102030405
+ * iov (NVLIST):
+ */
+struct pci_iov_arg
+{
+ void *config;
+ size_t len;
+};
+
+#define IOV_CONFIG _IOW('p', 10, struct pci_iov_arg)
+#define IOV_DELETE _IO('p', 11)
+#define IOV_GET_SCHEMA _IOWR('p', 12, struct pci_iov_schema)
+
+#endif
+
Index: sys/sys/iov_schema.h
===================================================================
--- /dev/null
+++ sys/sys/iov_schema.h
@@ -0,0 +1,52 @@
+/*-
+ * Copyright (c) 2014-2015 Sandvine Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/sys/sys/iov_schema.h 279451 2015-03-01 00:40:42Z rstone $
+ */
+
+#ifndef _SYS_IOV_SCHEMA_H_
+#define _SYS_IOV_SCHEMA_H_
+
+#define IOV_SCHEMA_HASDEFAULT (1 << 0)
+#define IOV_SCHEMA_REQUIRED (1 << 1)
+
+nvlist_t *pci_iov_schema_alloc_node(void);
+
+void pci_iov_schema_add_bool(nvlist_t *schema, const char *name,
+ uint32_t flags, int defaultVal);
+void pci_iov_schema_add_string(nvlist_t *schema, const char *name,
+ uint32_t flags, const char *defaultVal);
+void pci_iov_schema_add_uint8(nvlist_t *schema, const char *name,
+ uint32_t flags, uint8_t defaultVal);
+void pci_iov_schema_add_uint16(nvlist_t *schema, const char *name,
+ uint32_t flags, uint16_t defaultVal);
+void pci_iov_schema_add_uint32(nvlist_t *schema, const char *name,
+ uint32_t flags, uint32_t defaultVal);
+void pci_iov_schema_add_uint64(nvlist_t *schema, const char *name,
+ uint32_t flags, uint64_t defaultVal);
+void pci_iov_schema_add_unicast_mac(nvlist_t *schema, const char *name,
+ uint32_t flags, const uint8_t * defaultVal);
+
+#endif
Index: usr.sbin/Makefile
===================================================================
--- usr.sbin/Makefile
+++ usr.sbin/Makefile
@@ -35,6 +35,7 @@
i2c \
ifmcstat \
iostat \
+ iovctl \
kldxref \
mailwrapper \
makefs \
Index: usr.sbin/iovctl/Makefile
===================================================================
--- /dev/null
+++ usr.sbin/iovctl/Makefile
@@ -0,0 +1,20 @@
+# $FreeBSD$
+
+PROG= iovctl
+SRCS= iovctl.c parse.c validate.c
+
+DPADD= ${LIBNV} ${LIBUCL} ${LIBM}
+LDADD= -lnv -lucl -lm
+USEPRIVATELIB= ucl
+
+CFLAGS+=-I${.CURDIR}/../../contrib/libucl/include
+
+WARNS?=6
+
+MAN= \
+ iovctl.8 \
+ iovctl.conf.5 \
+
+.include <bsd.own.mk>
+.include <bsd.prog.mk>
+
Index: usr.sbin/iovctl/iovctl.h
===================================================================
--- /dev/null
+++ usr.sbin/iovctl/iovctl.h
@@ -0,0 +1,37 @@
+/*-
+ * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: head/usr.sbin/iovctl/iovctl.h 279461 2015-03-01 00:52:41Z rstone $
+ */
+
+#ifndef IOVCTL_H
+#define IOVCTL_H
+
+char * find_device(const char *);
+nvlist_t * parse_config_file(const char *, const nvlist_t *);
+void validate_config(nvlist_t *, const nvlist_t *, const regex_t *);
+
+#endif
+
Index: usr.sbin/iovctl/iovctl.8
===================================================================
--- /dev/null
+++ usr.sbin/iovctl/iovctl.8
@@ -0,0 +1,123 @@
+.\"
+.\" Copyright (c) 2014 Sandvine Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD: head/usr.sbin/iovctl/iovctl.8 285273 2015-07-08 16:16:44Z pkelsey $
+.\"
+.Dd July 8, 2015
+.Dt IOVCTL 8
+.Os
+.Sh NAME
+.Nm iovctl
+.Nd "PCI SR-IOV configuration utility"
+.Sh SYNOPSIS
+.Nm
+.Fl C
+.Op Fl f Ar config-file
+.Op Fl n
+.Nm
+.Fl D
+.Op Fl f Ar config-file | Fl d Ar device
+.Op Fl n
+.Nm
+.Fl S
+.Op Fl f Ar config-file | Fl d Ar device
+.Sh DESCRIPTION
+The
+.Nm
+utility creates or destroys PCI Single-Root I/O Virtualization
+.Pq SR-IOV
+Virtual Functions
+.Pq VFs .
+When invoked with the
+.Fl C
+flag,
+.Nm
+creates VFs as children of the Physical Function
+.Pq PF
+configured in the specified configuration file.
+When invoked with the
+.Fl D
+flag,
+.Nm
+destroys all VFs that are children of the specified device.
+Available PF devices can be seen in
+.Pa /dev/iov/ .
+.Pp
+The following options are available:
+.Bl -tag -width indent
+.It Fl C
+Enable SR-IOV on the specified PF device and create VF children.
+This operation will fail if the PF already has VF children.
+This option must be used in conjunction with the
+.Fl f
+option.
+.It Fl d Ar device
+Specify the PF device to use for the given operation.
+.Ar device
+may either be the name of a PF device, or a full path name to a node in
+.Pa /dev/iov/ .
+This option may not be used with the
+.Fl C
+option.
+.It Fl D
+Delete all VF children of the specified PF device.
+This operation will fail if SR-IOV is not currently enabled on the specified
+device.
+.It Fl f Ar config-file
+Specify the pathname of the configuration file.
+For the
+.Fl C
+option, this file will be used to specify all configuration values.
+For the
+.Fl D
+and
+.Fl S
+options, this file will only be used to specify the name of the PF device.
+.Pp
+See
+.Xr iovctl.conf
+for a description of the config file format and documentation of the
+configuration parameters that apply to all PF drivers.
+See the PF driver manual page for configuration parameters specific to
+particular hardware.
+.It Fl n
+Perform a dry-run.
+Perform all validation of the specified action and print what would be done,
+but do not perform the actual creation or destruction of VFs.
+This option may not be used with the
+.Fl S
+flag.
+.It Fl S
+Read the configuration schema from the specified device and print its contents
+to stdout.
+This action may be used to discover the configuration parameters supported on
+a given PF device.
+.El
+.Sh SEE ALSO
+.Xr iovctl.conf 5 ,
+.Xr rc.conf 5
+.Sh AUTHORS
+This manual page was written by
+.An Ryan Stone Aq Mt rstone@FreeBSD.org .
Index: usr.sbin/iovctl/iovctl.c
===================================================================
--- /dev/null
+++ usr.sbin/iovctl/iovctl.c
@@ -0,0 +1,403 @@
+/*-
+ * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/iovctl/iovctl.c 285063 2015-07-02 21:58:10Z oshogbo $");
+
+#include <sys/param.h>
+#include <sys/iov.h>
+#include <sys/dnv.h>
+#include <sys/nv.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "iovctl.h"
+
+static void config_action(const char *filename, int dryrun);
+static void delete_action(const char *device, int dryrun);
+static void print_schema(const char *device);
+
+/*
+ * Fetch the config schema from the kernel via ioctl. This function has to
+ * call the ioctl twice: the first returns the amount of memory that we need
+ * to allocate for the schema, and the second actually fetches the schema.
+ */
+static nvlist_t *
+get_schema(int fd)
+{
+ struct pci_iov_schema arg;
+ nvlist_t *schema;
+ int error;
+
+ /* Do the ioctl() once to fetch the size of the schema. */
+ arg.schema = NULL;
+ arg.len = 0;
+ arg.error = 0;
+ error = ioctl(fd, IOV_GET_SCHEMA, &arg);
+ if (error != 0)
+ err(1, "Could not fetch size of config schema");
+
+ arg.schema = malloc(arg.len);
+ if (arg.schema == NULL)
+ err(1, "Could not allocate %zu bytes for schema",
+ arg.len);
+
+ /* Now do the ioctl() for real to get the schema. */
+ error = ioctl(fd, IOV_GET_SCHEMA, &arg);
+ if (error != 0 || arg.error != 0) {
+ if (arg.error != 0)
+ errno = arg.error;
+ err(1, "Could not fetch config schema");
+ }
+
+ schema = nvlist_unpack(arg.schema, arg.len, NV_FLAG_IGNORE_CASE);
+ if (schema == NULL)
+ err(1, "Could not unpack schema");
+
+ free(arg.schema);
+ return (schema);
+}
+
+/*
+ * Call the ioctl that activates SR-IOV and creates the VFs.
+ */
+static void
+config_iov(int fd, const char *dev_name, const nvlist_t *config, int dryrun)
+{
+ struct pci_iov_arg arg;
+ int error;
+
+ arg.config = nvlist_pack(config, &arg.len);
+ if (arg.config == NULL)
+ err(1, "Could not pack configuration");
+
+ if (dryrun) {
+ printf("Would enable SR-IOV on device '%s'.\n", dev_name);
+ printf(
+ "The following configuration parameters would be used:\n");
+ nvlist_fdump(config, stdout);
+ printf(
+ "The configuration parameters consume %zu bytes when packed.\n",
+ arg.len);
+ } else {
+ error = ioctl(fd, IOV_CONFIG, &arg);
+ if (error != 0)
+ err(1, "Failed to configure SR-IOV");
+ }
+
+ free(arg.config);
+}
+
+static int
+open_device(const char *dev_name)
+{
+ char *dev;
+ int fd;
+ size_t copied, size;
+ long path_max;
+
+ path_max = pathconf("/dev", _PC_PATH_MAX);
+ if (path_max < 0)
+ err(1, "Could not get maximum path length");
+
+ size = path_max;
+ dev = malloc(size);
+ if (dev == NULL)
+ err(1, "Could not allocate memory for device path");
+
+ if (dev_name[0] == '/')
+ copied = strlcpy(dev, dev_name, size);
+ else
+ copied = snprintf(dev, size, "/dev/iov/%s", dev_name);
+
+ /* >= to account for null terminator. */
+ if (copied >= size)
+ errx(1, "Provided file name too long");
+
+ fd = open(dev, O_RDWR);
+ if (fd < 0)
+ err(1, "Could not open device '%s'", dev);
+
+ free(dev);
+ return (fd);
+}
+
+static void
+usage(void)
+{
+
+ warnx("Usage: iovctl -C -f <config file> [-n]");
+ warnx(" iovctl -D [-d <PF device> | -f <config file>] [-n]");
+ warnx(" iovctl -S [-d <PF device> | -f <config file>]");
+ exit(1);
+
+}
+
+enum main_action {
+ NONE,
+ CONFIG,
+ DELETE,
+ PRINT_SCHEMA,
+};
+
+int
+main(int argc, char **argv)
+{
+ char *device;
+ const char *filename;
+ int ch, dryrun;
+ enum main_action action;
+
+ device = NULL;
+ filename = NULL;
+ dryrun = 0;
+ action = NONE;
+
+ while ((ch = getopt(argc, argv, "Cd:Df:nS")) != -1) {
+ switch (ch) {
+ case 'C':
+ if (action != NONE) {
+ warnx(
+ "Only one of -C, -D or -S may be specified");
+ usage();
+ }
+ action = CONFIG;
+ break;
+ case 'd':
+ device = strdup(optarg);
+ break;
+ case 'D':
+ if (action != NONE) {
+ warnx(
+ "Only one of -C, -D or -S may be specified");
+ usage();
+ }
+ action = DELETE;
+ break;
+ case 'f':
+ filename = optarg;
+ break;
+ case 'n':
+ dryrun = 1;
+ break;
+ case 'S':
+ if (action != NONE) {
+ warnx(
+ "Only one of -C, -D or -S may be specified");
+ usage();
+ }
+ action = PRINT_SCHEMA;
+ break;
+ case '?':
+ warnx("Unrecognized argument '-%c'\n", optopt);
+ usage();
+ break;
+ }
+ }
+
+ if (device != NULL && filename != NULL) {
+ warnx("Only one of the -d and -f flags may be specified");
+ usage();
+ }
+
+ if (device == NULL && filename == NULL) {
+ warnx("Either the -d or -f flag must be specified");
+ usage();
+ }
+
+ switch (action) {
+ case CONFIG:
+ if (filename == NULL) {
+ warnx("-d flag cannot be used with the -C flag");
+ usage();
+ }
+ config_action(filename, dryrun);
+ break;
+ case DELETE:
+ if (device == NULL)
+ device = find_device(filename);
+ delete_action(device, dryrun);
+ free(device);
+ break;
+ case PRINT_SCHEMA:
+ if (dryrun) {
+ warnx("-n flag cannot be used with the -S flag");
+ usage();
+ }
+ if (device == NULL)
+ device = find_device(filename);
+ print_schema(device);
+ free(device);
+ break;
+ default:
+ usage();
+ break;
+ }
+
+ exit(0);
+}
+
+static void
+config_action(const char *filename, int dryrun)
+{
+ char *dev;
+ nvlist_t *schema, *config;
+ int fd;
+
+ dev = find_device(filename);
+ fd = open(dev, O_RDWR);
+ if (fd < 0)
+ err(1, "Could not open device '%s'", dev);
+
+ schema = get_schema(fd);
+ config = parse_config_file(filename, schema);
+ if (config == NULL)
+ errx(1, "Could not parse config");
+
+ config_iov(fd, dev, config, dryrun);
+
+ nvlist_destroy(config);
+ nvlist_destroy(schema);
+ free(dev);
+ close(fd);
+}
+
+static void
+delete_action(const char *dev_name, int dryrun)
+{
+ int fd, error;
+
+ fd = open_device(dev_name);
+
+ if (dryrun)
+ printf("Would attempt to delete all VF children of '%s'\n",
+ dev_name);
+ else {
+ error = ioctl(fd, IOV_DELETE);
+ if (error != 0)
+ err(1, "Failed to delete VFs");
+ }
+
+ close(fd);
+}
+
+static void
+print_default_value(const nvlist_t *parameter, const char *type)
+{
+ const uint8_t *mac;
+ size_t size;
+
+ if (strcasecmp(type, "bool") == 0)
+ printf(" (default = %s)",
+ nvlist_get_bool(parameter, DEFAULT_SCHEMA_NAME) ? "true" :
+ "false");
+ else if (strcasecmp(type, "string") == 0)
+ printf(" (default = %s)",
+ nvlist_get_string(parameter, DEFAULT_SCHEMA_NAME));
+ else if (strcasecmp(type, "uint8_t") == 0)
+ printf(" (default = %ju)",
+ (uintmax_t)nvlist_get_number(parameter,
+ DEFAULT_SCHEMA_NAME));
+ else if (strcasecmp(type, "uint16_t") == 0)
+ printf(" (default = %ju)",
+ (uintmax_t)nvlist_get_number(parameter,
+ DEFAULT_SCHEMA_NAME));
+ else if (strcasecmp(type, "uint32_t") == 0)
+ printf(" (default = %ju)",
+ (uintmax_t)nvlist_get_number(parameter,
+ DEFAULT_SCHEMA_NAME));
+ else if (strcasecmp(type, "uint64_t") == 0)
+ printf(" (default = %ju)",
+ (uintmax_t)nvlist_get_number(parameter,
+ DEFAULT_SCHEMA_NAME));
+ else if (strcasecmp(type, "unicast-mac") == 0) {
+ mac = nvlist_get_binary(parameter, DEFAULT_SCHEMA_NAME, &size);
+ printf(" (default = %02x:%02x:%02x:%02x:%02x:%02x)", mac[0],
+ mac[1], mac[2], mac[3], mac[4], mac[5]);
+ } else
+ errx(1, "Unexpected type in schema: '%s'", type);
+}
+
+static void
+print_subsystem_schema(const nvlist_t * subsystem_schema)
+{
+ const char *name, *type;
+ const nvlist_t *parameter;
+ void *it;
+ int nvtype;
+
+ it = NULL;
+ while ((name = nvlist_next(subsystem_schema, &nvtype, &it)) != NULL) {
+ parameter = nvlist_get_nvlist(subsystem_schema, name);
+ type = nvlist_get_string(parameter, TYPE_SCHEMA_NAME);
+
+ printf("\t%s : %s", name, type);
+ if (dnvlist_get_bool(parameter, REQUIRED_SCHEMA_NAME, false))
+ printf(" (required)");
+ else if (nvlist_exists(parameter, DEFAULT_SCHEMA_NAME))
+ print_default_value(parameter, type);
+ else
+ printf(" (optional)");
+ printf("\n");
+ }
+}
+
+static void
+print_schema(const char *dev_name)
+{
+ nvlist_t *schema;
+ const nvlist_t *iov_schema, *driver_schema, *pf_schema, *vf_schema;
+ int fd;
+
+ fd = open_device(dev_name);
+ schema = get_schema(fd);
+
+ pf_schema = nvlist_get_nvlist(schema, PF_CONFIG_NAME);
+ iov_schema = nvlist_get_nvlist(pf_schema, IOV_CONFIG_NAME);
+ driver_schema = nvlist_get_nvlist(pf_schema, DRIVER_CONFIG_NAME);
+ printf(
+"The following configuration parameters may be configured on the PF:\n");
+ print_subsystem_schema(iov_schema);
+ print_subsystem_schema(driver_schema);
+
+ vf_schema = nvlist_get_nvlist(schema, VF_SCHEMA_NAME);
+ iov_schema = nvlist_get_nvlist(vf_schema, IOV_CONFIG_NAME);
+ driver_schema = nvlist_get_nvlist(vf_schema, DRIVER_CONFIG_NAME);
+ printf(
+"\nThe following configuration parameters may be configured on a VF:\n");
+ print_subsystem_schema(iov_schema);
+ print_subsystem_schema(driver_schema);
+
+ nvlist_destroy(schema);
+ close(fd);
+}
Index: usr.sbin/iovctl/iovctl.conf.5
===================================================================
--- /dev/null
+++ usr.sbin/iovctl/iovctl.conf.5
@@ -0,0 +1,171 @@
+.\"
+.\" Copyright (c) 2014 Sandvine Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD: head/usr.sbin/iovctl/iovctl.conf.5 285273 2015-07-08 16:16:44Z pkelsey $
+.\"
+.Dd July 8, 2015
+.Dt IOVCTL.CONF 5
+.Os
+.Sh NAME
+.Nm iovctl.conf
+.Nd IOVCTL configuration file
+.Sh DESCRIPTION
+The
+.Nm
+file is the configuration file for the
+.Xr iovctl 8
+program.
+This file specifies configuration parameters for a single Physical Function
+.Pq PF
+device.
+To configure SR-IOV on multiple PF devices, use one configuration file for each
+PF.
+The locations of all
+.Xr iovctl 9
+configuration files are specified in
+.Xr rc.conf 5 .
+.Pp
+The
+.Nm
+file uses UCL format.
+UCL syntax is documented at the official UCL website:
+http://github.com/vstakhov/libucl.
+.Pp
+There are three types of sections in the
+.Nm
+file.
+A section is a key at the top level of the file with a list as its value.
+The list may contain the keys specified in the
+.Sx OPTIONS
+section of this manual page.
+Individual PF driver implementations may specify additional device-specific
+configuration keys that they will accept.
+The order in which sections appear in
+.Nm
+is ignored.
+No two sections may have the same key.
+For example, two sections for VF-1 must not be defined.
+.Pp
+The first section type is the PF section.
+This section always has the key "PF"; therefore, only one such section may be
+defined.
+This section defines configuration parameters that apply to the PF as a whole.
+.Pp
+The second section type is the VF section.
+This section has the key "VF-" followed by a VF index.
+VF indices start at 0 and always increment by 1.
+Valid VF indices are in the range of 0 to
+.Pq num_vfs - 1 .
+The VF index must be given as a decimal integer with no leading zeros.
+This section defines configuration parameters that apply to a single VF.
+.Pp
+The third section type is the default section.
+This section always has the key "DEFAULT"; therefore, only one such section may
+be specified.
+This section defines default configuration parameters that apply to all VFs.
+All configuration keys that are valid to be applied to a VF are valid in this
+section.
+An individual VF section may override a default specified in this section by
+providing a different value for the configuration parameter.
+Note that the default section applies to ALL VFs.
+The default section must appear before any VF sections.
+The default section may appear before or after the PF section.
+.Pp
+The following option types are supported:
+.Bl -tag -width indent
+.It boolean
+Accepts a boolean value of true or false.
+.It mac-addr
+Accepts a unicast MAC address specified as a string of the form
+xx:xx:xx:xx:xx:xx, where xx is one or two hexadecimal digits.
+.It string
+Accepts any string value.
+.It uint8_t
+Accepts any integer in the range 0 to 255, inclusive.
+.It uint16_t
+Accepts any integer in the range 0 to 65535, inclusive.
+.It uint32_t
+Accepts any integer in the range 0 to
+.Pq 2**32 - 1 ,
+inclusive.
+.It uint64_t
+Accepts any integer in the range 0 to
+.Pq 2**64 - 1 ,
+inclusive.
+.El
+.Sh OPTIONS
+The following parameters are accepted by all PF drivers:
+.Bl -tag -width indent
+.It device Pq string
+This parameter specifies the name of the PF device.
+This parameter is required to be specified.
+.It num_vfs Pq uint16_t
+This parameter specifies the number of VF children to create.
+This parameter may not be zero.
+The maximum value of this parameter is device-specific.
+.El
+.Pp
+The following parameters are accepted by all VFs:
+.Bl -tag -width indent
+.It passthrough Pq boolean
+This parameter controls whether the VF is reserved for the use of the
+.Xr bhyve 8
+hypervisor as a PCI passthrough device.
+If this parameter is set to true, then the VF will be reserved as a PCI
+passthrough device and it will not be accessible from the host OS.
+The default value of this parameter is false.
+.El
+.Pp
+See the PF driver manual page for configuration parameters specific to
+particular hardware.
+.Sh EXAMPLES
+This sample file will create 3 VFs as children of the ix0 device.
+VF-1 and VF-2 are set as
+.Xr bhyve 8
+passthrough devices through the use of the default section.
+VF-0 is not configured as a passthrough device as it explicitly overrides the
+default.
+VF-0 also sets a device-specific parameter named mac-addr.
+.Bd -literal -offset ident
+PF {
+ device : "ix0";
+ num_vfs : 3;
+}
+
+DEFAULT {
+ passthrough : true;
+}
+
+VF-0 {
+ mac-addr : "02:56:48:7e:d9:f7";
+ passthrough : false;
+}
+.Ed
+.Sh SEE ALSO
+.Xr rc.conf 5 ,
+.Xr iovctl 8
+.Sh AUTHORS
+This manual page was written by
+.An Ryan Stone Aq Mt rstone@FreeBSD.org .
Index: usr.sbin/iovctl/parse.c
===================================================================
--- /dev/null
+++ usr.sbin/iovctl/parse.c
@@ -0,0 +1,416 @@
+/*-
+ * Copyright (c) 2014-2015 Sandvine Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/iovctl/parse.c 285063 2015-07-02 21:58:10Z oshogbo $");
+
+#include <sys/param.h>
+#include <sys/iov.h>
+#include <sys/nv.h>
+#include <net/ethernet.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <regex.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucl.h>
+#include <unistd.h>
+
+#include "iovctl.h"
+
+static void
+report_config_error(const char *key, const ucl_object_t *obj, const char *type)
+{
+
+ errx(1, "Value '%s' of key '%s' is not of type %s",
+ ucl_object_tostring(obj), key, type);
+}
+
+/*
+ * Verifies that the value specified in the config file is a boolean value, and
+ * then adds the value to the configuration.
+ */
+static void
+add_bool_config(const char *key, const ucl_object_t *obj, nvlist_t *config)
+{
+ bool val;
+
+ if (!ucl_object_toboolean_safe(obj, &val))
+ report_config_error(key, obj, "bool");
+
+ nvlist_add_bool(config, key, val);
+}
+
+/*
+ * Verifies that the value specified in the config file is a string, and then
+ * adds the value to the configuration.
+ */
+static void
+add_string_config(const char *key, const ucl_object_t *obj, nvlist_t *config)
+{
+ const char *val;
+
+ if (!ucl_object_tostring_safe(obj, &val))
+ report_config_error(key, obj, "string");
+
+ nvlist_add_string(config, key, val);
+}
+
+/*
+ * Verifies that the value specified in the config file is a integer value
+ * within the specified range, and then adds the value to the configuration.
+ */
+static void
+add_uint_config(const char *key, const ucl_object_t *obj, nvlist_t *config,
+ const char *type, uint64_t max)
+{
+ int64_t val;
+ uint64_t uval;
+
+ /* I must use a signed type here as libucl doesn't provide unsigned. */
+ if (!ucl_object_toint_safe(obj, &val))
+ report_config_error(key, obj, type);
+
+ if (val < 0)
+ report_config_error(key, obj, type);
+
+ uval = val;
+ if (uval > max)
+ report_config_error(key, obj, type);
+
+ nvlist_add_number(config, key, uval);
+}
+
+/*
+ * Verifies that the value specified in the config file is a unicast MAC
+ * address, and then adds the value to the configuration.
+ */
+static void
+add_unicast_mac_config(const char *key, const ucl_object_t *obj, nvlist_t *config)
+{
+ uint8_t mac[ETHER_ADDR_LEN];
+ const char *val, *token;
+ char *parse, *orig_parse, *tokpos, *endpos;
+ size_t len;
+ u_long value;
+ int i;
+
+ if (!ucl_object_tostring_safe(obj, &val))
+ report_config_error(key, obj, "unicast-mac");
+
+ parse = strdup(val);
+ orig_parse = parse;
+
+ i = 0;
+ while ((token = strtok_r(parse, ":", &tokpos)) != NULL) {
+ parse = NULL;
+
+ len = strlen(token);
+ if (len < 1 || len > 2)
+ report_config_error(key, obj, "unicast-mac");
+
+ value = strtoul(token, &endpos, 16);
+
+ if (*endpos != '\0')
+ report_config_error(key, obj, "unicast-mac");
+
+ if (value > UINT8_MAX)
+ report_config_error(key, obj, "unicast-mac");
+
+ if (i >= ETHER_ADDR_LEN)
+ report_config_error(key, obj, "unicast-mac");
+
+ mac[i] = value;
+ i++;
+ }
+
+ free(orig_parse);
+
+ if (i != ETHER_ADDR_LEN)
+ report_config_error(key, obj, "unicast-mac");
+
+ if (ETHER_IS_MULTICAST(mac))
+ errx(1, "Value '%s' of key '%s' is a multicast address",
+ ucl_object_tostring(obj), key);
+
+ nvlist_add_binary(config, key, mac, ETHER_ADDR_LEN);
+}
+
+/*
+ * Validates that the given configuation value has the right type as specified
+ * in the schema, and then adds the value to the configuation node.
+ */
+static void
+add_config(const char *key, const ucl_object_t *obj, nvlist_t *config,
+ const nvlist_t *schema)
+{
+ const char *type;
+
+ type = nvlist_get_string(schema, TYPE_SCHEMA_NAME);
+
+ if (strcasecmp(type, "bool") == 0)
+ add_bool_config(key, obj, config);
+ else if (strcasecmp(type, "string") == 0)
+ add_string_config(key, obj, config);
+ else if (strcasecmp(type, "uint8_t") == 0)
+ add_uint_config(key, obj, config, type, UINT8_MAX);
+ else if (strcasecmp(type, "uint16_t") == 0)
+ add_uint_config(key, obj, config, type, UINT16_MAX);
+ else if (strcasecmp(type, "uint32_t") == 0)
+ add_uint_config(key, obj, config, type, UINT32_MAX);
+ else if (strcasecmp(type, "uint64_t") == 0)
+ add_uint_config(key, obj, config, type, UINT64_MAX);
+ else if (strcasecmp(type, "unicast-mac") == 0)
+ add_unicast_mac_config(key, obj, config);
+ else
+ errx(1, "Unexpected type '%s' in schema", type);
+}
+
+/*
+ * Parses all values specified in a device section in the configuration file,
+ * validates that the key/value pair is valid in the schema, and then adds
+ * the key/value pair to the correct subsystem in the config.
+ */
+static void
+parse_device_config(const ucl_object_t *top, nvlist_t *config,
+ const char *subsystem, const nvlist_t *schema)
+{
+ ucl_object_iter_t it;
+ const ucl_object_t *obj;
+ nvlist_t *subsystem_config, *driver_config, *iov_config;
+ const nvlist_t *driver_schema, *iov_schema;
+ const char *key;
+
+ if (nvlist_exists(config, subsystem))
+ errx(1, "Multiple definitions of '%s' in config file",
+ subsystem);
+
+ driver_schema = nvlist_get_nvlist(schema, DRIVER_CONFIG_NAME);
+ iov_schema = nvlist_get_nvlist(schema, IOV_CONFIG_NAME);
+
+ driver_config = nvlist_create(NV_FLAG_IGNORE_CASE);
+ if (driver_config == NULL)
+ err(1, "Could not allocate config nvlist");
+
+ iov_config = nvlist_create(NV_FLAG_IGNORE_CASE);
+ if (iov_config == NULL)
+ err(1, "Could not allocate config nvlist");
+
+ subsystem_config = nvlist_create(NV_FLAG_IGNORE_CASE);
+ if (subsystem_config == NULL)
+ err(1, "Could not allocate config nvlist");
+
+ it = NULL;
+ while ((obj = ucl_iterate_object(top, &it, true)) != NULL) {
+ key = ucl_object_key(obj);
+
+ if (nvlist_exists_nvlist(iov_schema, key))
+ add_config(key, obj, iov_config,
+ nvlist_get_nvlist(iov_schema, key));
+ else if (nvlist_exists_nvlist(driver_schema, key))
+ add_config(key, obj, driver_config,
+ nvlist_get_nvlist(driver_schema, key));
+ else
+ errx(1, "%s: Invalid config key '%s'", subsystem, key);
+ }
+
+ nvlist_move_nvlist(subsystem_config, DRIVER_CONFIG_NAME, driver_config);
+ nvlist_move_nvlist(subsystem_config, IOV_CONFIG_NAME, iov_config);
+ nvlist_move_nvlist(config, subsystem, subsystem_config);
+}
+
+/*
+ * Parses the specified config file using the given schema, and returns an
+ * nvlist containing the configuration specified by the file.
+ *
+ * Exits with a message to stderr and an error if any config validation fails.
+ */
+nvlist_t *
+parse_config_file(const char *filename, const nvlist_t *schema)
+{
+ ucl_object_iter_t it;
+ struct ucl_parser *parser;
+ ucl_object_t *top;
+ const ucl_object_t *obj;
+ nvlist_t *config;
+ const nvlist_t *pf_schema, *vf_schema;
+ const char *errmsg, *key;
+ regex_t vf_pat;
+ int regex_err, processed_vf;
+
+ regex_err = regcomp(&vf_pat, "^"VF_PREFIX"([1-9][0-9]*|0)$",
+ REG_EXTENDED | REG_ICASE);
+ if (regex_err != 0)
+ errx(1, "Could not compile VF regex");
+
+ parser = ucl_parser_new(0);
+ if (parser == NULL)
+ err(1, "Could not allocate parser");
+
+ if (!ucl_parser_add_file(parser, filename))
+ err(1, "Could not open '%s' for reading", filename);
+
+ errmsg = ucl_parser_get_error(parser);
+ if (errmsg != NULL)
+ errx(1, "Could not parse '%s': %s", filename, errmsg);
+
+ config = nvlist_create(NV_FLAG_IGNORE_CASE);
+ if (config == NULL)
+ err(1, "Could not allocate config nvlist");
+
+ pf_schema = nvlist_get_nvlist(schema, PF_CONFIG_NAME);
+ vf_schema = nvlist_get_nvlist(schema, VF_SCHEMA_NAME);
+
+ processed_vf = 0;
+ top = ucl_parser_get_object(parser);
+ it = NULL;
+ while ((obj = ucl_iterate_object(top, &it, true)) != NULL) {
+ key = ucl_object_key(obj);
+
+ if (strcasecmp(key, PF_CONFIG_NAME) == 0)
+ parse_device_config(obj, config, key, pf_schema);
+ else if (strcasecmp(key, DEFAULT_SCHEMA_NAME) == 0) {
+ /*
+ * Enforce that the default section must come before all
+ * VF sections. This will hopefully prevent confusing
+ * the user by having a default value apply to a VF
+ * that was declared earlier in the file.
+ *
+ * This also gives us the flexibility to extend the file
+ * format in the future to allow for multiple default
+ * sections that do only apply to subsequent VF
+ * sections.
+ */
+ if (processed_vf)
+ errx(1,
+ "'default' section must precede all VF sections");
+
+ parse_device_config(obj, config, key, vf_schema);
+ } else if (regexec(&vf_pat, key, 0, NULL, 0) == 0) {
+ processed_vf = 1;
+ parse_device_config(obj, config, key, vf_schema);
+ } else
+ errx(1, "Unexpected top-level node: %s", key);
+ }
+
+ validate_config(config, schema, &vf_pat);
+
+ ucl_object_unref(top);
+ ucl_parser_free(parser);
+ regfree(&vf_pat);
+
+ return (config);
+}
+
+/*
+ * Parse the PF configuration section for and return the value specified for
+ * the device parameter, or NULL if the device is not specified.
+ */
+static const char *
+find_pf_device(const ucl_object_t *pf)
+{
+ ucl_object_iter_t it;
+ const ucl_object_t *obj;
+ const char *key, *device;
+
+ it = NULL;
+ while ((obj = ucl_iterate_object(pf, &it, true)) != NULL) {
+ key = ucl_object_key(obj);
+
+ if (strcasecmp(key, "device") == 0) {
+ if (!ucl_object_tostring_safe(obj, &device))
+ err(1,
+ "Config PF.device must be a string");
+
+ return (device);
+ }
+ }
+
+ return (NULL);
+}
+
+/*
+ * Manually parse the config file looking for the name of the PF device. We
+ * have to do this separately because we need the config schema to call the
+ * normal config file parsing code, and we need to know the name of the PF
+ * device so that we can fetch the schema from it.
+ *
+ * This will always exit on failure, so if it returns then it is guaranteed to
+ * have returned a valid device name.
+ */
+char *
+find_device(const char *filename)
+{
+ char *device;
+ const char *deviceName;
+ ucl_object_iter_t it;
+ struct ucl_parser *parser;
+ ucl_object_t *top;
+ const ucl_object_t *obj;
+ const char *errmsg, *key;
+ int error;
+
+ device = NULL;
+ deviceName = NULL;
+
+ parser = ucl_parser_new(0);
+ if (parser == NULL)
+ err(1, "Could not allocate parser");
+
+ if (!ucl_parser_add_file(parser, filename))
+ err(1, "Could not open '%s' for reading", filename);
+
+ errmsg = ucl_parser_get_error(parser);
+ if (errmsg != NULL)
+ errx(1, "Could not parse '%s': %s", filename, errmsg);
+
+ top = ucl_parser_get_object (parser);
+ it = NULL;
+ while ((obj = ucl_iterate_object(top, &it, true)) != NULL) {
+ key = ucl_object_key(obj);
+
+ if (strcasecmp(key, PF_CONFIG_NAME) == 0) {
+ deviceName = find_pf_device(obj);
+ break;
+ }
+ }
+
+ if (deviceName == NULL)
+ errx(1, "Config file does not specify device");
+
+ error = asprintf(&device, "/dev/iov/%s", deviceName);
+ if (error < 0)
+ err(1, "Could not allocate memory for device");
+
+ ucl_object_unref(top);
+ ucl_parser_free(parser);
+
+ return (device);
+}
Index: usr.sbin/iovctl/validate.c
===================================================================
--- /dev/null
+++ usr.sbin/iovctl/validate.c
@@ -0,0 +1,274 @@
+/*-
+ * Copyright (c) 2014-2015 Sandvine Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/usr.sbin/iovctl/validate.c 285063 2015-07-02 21:58:10Z oshogbo $");
+
+#include <sys/param.h>
+#include <sys/iov.h>
+#include <sys/dnv.h>
+#include <sys/nv.h>
+
+#include <err.h>
+#include <regex.h>
+#include <stdlib.h>
+
+#include "iovctl.h"
+
+/*
+ * Returns a writeable pointer to the configuration for the given device.
+ * If no configuration exists, a new nvlist with empty driver and iov
+ * sections is allocated and returned.
+ *
+ * Returning a writeable pointer requires removing the configuration from config
+ * using nvlist_take. It is the responsibility of the caller to re-insert the
+ * nvlist in config with nvlist_move_nvlist.
+ */
+static nvlist_t *
+find_config(nvlist_t *config, const char * device)
+{
+ nvlist_t *subsystem, *empty_driver, *empty_iov;
+
+ subsystem = dnvlist_take_nvlist(config, device, NULL);
+
+ if (subsystem != NULL)
+ return (subsystem);
+
+ empty_driver = nvlist_create(NV_FLAG_IGNORE_CASE);
+ if (empty_driver == NULL)
+ err(1, "Could not allocate config nvlist");
+
+ empty_iov = nvlist_create(NV_FLAG_IGNORE_CASE);
+ if (empty_iov == NULL)
+ err(1, "Could not allocate config nvlist");
+
+ subsystem = nvlist_create(NV_FLAG_IGNORE_CASE);
+ if (subsystem == NULL)
+ err(1, "Could not allocate config nvlist");
+
+ nvlist_move_nvlist(subsystem, DRIVER_CONFIG_NAME, empty_driver);
+ nvlist_move_nvlist(subsystem, IOV_CONFIG_NAME, empty_iov);
+
+ return (subsystem);
+}
+
+static uint16_t
+parse_vf_num(const char *key, regmatch_t *matches)
+{
+ u_long vf_num;
+
+ vf_num = strtoul(key + matches[1].rm_so, NULL, 10);
+
+ if (vf_num > UINT16_MAX)
+ errx(1, "VF number %lu is too large to be valid",
+ vf_num);
+
+ return (vf_num);
+}
+
+/*
+ * Apply the default values specified in device_defaults to the specified
+ * subsystem in the given device_config.
+ *
+ * This function assumes that the values specified in device_defaults have
+ * already been validated.
+ */
+static void
+apply_subsystem_defaults(nvlist_t *device_config, const char *subsystem,
+ const nvlist_t *device_defaults)
+{
+ nvlist_t *config;
+ const nvlist_t *defaults;
+ const char *name;
+ void *cookie;
+ size_t len;
+ const void *bin;
+ int type;
+
+ config = nvlist_take_nvlist(device_config, subsystem);
+ defaults = nvlist_get_nvlist(device_defaults, subsystem);
+
+ cookie = NULL;
+ while ((name = nvlist_next(defaults, &type, &cookie)) != NULL) {
+ if (nvlist_exists(config, name))
+ continue;
+
+ switch (type) {
+ case NV_TYPE_BOOL:
+ nvlist_add_bool(config, name,
+ nvlist_get_bool(defaults, name));
+ break;
+ case NV_TYPE_NUMBER:
+ nvlist_add_number(config, name,
+ nvlist_get_number(defaults, name));
+ break;
+ case NV_TYPE_STRING:
+ nvlist_add_string(config, name,
+ nvlist_get_string(defaults, name));
+ break;
+ case NV_TYPE_NVLIST:
+ nvlist_add_nvlist(config, name,
+ nvlist_get_nvlist(defaults, name));
+ break;
+ case NV_TYPE_BINARY:
+ bin = nvlist_get_binary(defaults, name, &len);
+ nvlist_add_binary(config, name, bin, len);
+ break;
+ default:
+ errx(1, "Unexpected type '%d'", type);
+ }
+ }
+ nvlist_move_nvlist(device_config, subsystem, config);
+}
+
+/*
+ * Iterate over every subsystem in the given VF device and apply default values
+ * for parameters that were not configured with a value.
+ *
+ * This function assumes that the values specified in defaults have already been
+ * validated.
+ */
+static void
+apply_defaults(nvlist_t *vf, const nvlist_t *defaults)
+{
+
+ apply_subsystem_defaults(vf, DRIVER_CONFIG_NAME, defaults);
+ apply_subsystem_defaults(vf, IOV_CONFIG_NAME, defaults);
+}
+
+/*
+ * Validate that all required parameters have been configured in the specified
+ * subsystem.
+ */
+static void
+validate_subsystem(const nvlist_t *device, const nvlist_t *device_schema,
+ const char *subsystem_name, const char *config_name)
+{
+ const nvlist_t *subsystem, *schema, *config;
+ const char *name;
+ void *cookie;
+ int type;
+
+ subsystem = nvlist_get_nvlist(device, subsystem_name);
+ schema = nvlist_get_nvlist(device_schema, subsystem_name);
+
+ cookie = NULL;
+ while ((name = nvlist_next(schema, &type, &cookie)) != NULL) {
+ config = nvlist_get_nvlist(schema, name);
+
+ if (dnvlist_get_bool(config, REQUIRED_SCHEMA_NAME, false)) {
+ if (!nvlist_exists(subsystem, name))
+ errx(1,
+ "Required parameter '%s' not found in '%s'",
+ name, config_name);
+ }
+ }
+}
+
+/*
+ * Validate that all required parameters have been configured in all subsystems
+ * in the device.
+ */
+static void
+validate_device(const nvlist_t *device, const nvlist_t *schema,
+ const char *config_name)
+{
+
+ validate_subsystem(device, schema, DRIVER_CONFIG_NAME, config_name);
+ validate_subsystem(device, schema, IOV_CONFIG_NAME, config_name);
+}
+
+static uint16_t
+get_num_vfs(const nvlist_t *pf)
+{
+ const nvlist_t *iov;
+
+ iov = nvlist_get_nvlist(pf, IOV_CONFIG_NAME);
+ return (nvlist_get_number(iov, "num_vfs"));
+}
+
+/*
+ * Validates the configuration that has been parsed into config using the given
+ * config schema. Note that the parser is required to not insert configuration
+ * keys that are not valid in the schema, and to not insert configuration values
+ * that are of the incorrect type. Therefore this function will not validate
+ * either condition. This function is only responsible for inserting config
+ * file defaults in individual VF sections and removing the DEFAULT_SCHEMA_NAME
+ * subsystem from config, validating that all required parameters in the schema
+ * are present in each PF and VF subsystem, and that there is no VF subsystem
+ * section whose number exceeds num_vfs.
+ */
+void
+validate_config(nvlist_t *config, const nvlist_t *schema, const regex_t *vf_pat)
+{
+ char device_name[VF_MAX_NAME];
+ regmatch_t matches[2];
+ nvlist_t *defaults, *pf, *vf;
+ const nvlist_t *vf_schema;
+ const char *key;
+ void *cookie;
+ int i, type;
+ uint16_t vf_num, num_vfs;
+
+ pf = find_config(config, PF_CONFIG_NAME);
+ validate_device(pf, nvlist_get_nvlist(schema, PF_CONFIG_NAME),
+ PF_CONFIG_NAME);
+ nvlist_move_nvlist(config, PF_CONFIG_NAME, pf);
+
+ num_vfs = get_num_vfs(pf);
+ vf_schema = nvlist_get_nvlist(schema, VF_SCHEMA_NAME);
+
+ if (num_vfs == 0)
+ errx(1, "PF.num_vfs must be at least 1");
+
+ defaults = dnvlist_take_nvlist(config, DEFAULT_SCHEMA_NAME, NULL);
+
+ for (i = 0; i < num_vfs; i++) {
+ snprintf(device_name, sizeof(device_name), VF_PREFIX"%d",
+ i);
+
+ vf = find_config(config, device_name);
+
+ if (defaults != NULL)
+ apply_defaults(vf, defaults);
+
+ validate_device(vf, vf_schema, device_name);
+ nvlist_move_nvlist(config, device_name, vf);
+ }
+ nvlist_destroy(defaults);
+
+ cookie = NULL;
+ while ((key = nvlist_next(config, &type, &cookie)) != NULL) {
+ if (regexec(vf_pat, key, nitems(matches), matches, 0) == 0) {
+ vf_num = parse_vf_num(key, matches);
+ if (vf_num >= num_vfs)
+ errx(1,
+ "VF number %d is out of bounds (num_vfs=%d)",
+ vf_num, num_vfs);
+ }
+ }
+}
+
Index: usr.sbin/pciconf/cap.c
===================================================================
--- usr.sbin/pciconf/cap.c
+++ usr.sbin/pciconf/cap.c
@@ -30,13 +30,14 @@
#ifndef lint
static const char rcsid[] =
- "$FreeBSD$";
+ "$FreeBSD: head/usr.sbin/pciconf/cap.c 290412 2015-11-05 20:24:56Z jhb $";
#endif /* not lint */
#include <sys/types.h>
#include <err.h>
#include <stdio.h>
+#include <strings.h>
#include <sys/agpio.h>
#include <sys/pciio.h>
@@ -640,7 +641,7 @@
printf(" %d fatal", bitcount32(sta & mask));
printf(" %d non-fatal", bitcount32(sta & ~mask));
sta = read_config(fd, &p->pc_sel, ptr + PCIR_AER_COR_STATUS, 4);
- printf(" %d corrected", bitcount32(sta));
+ printf(" %d corrected\n", bitcount32(sta));
}
static void
@@ -656,6 +657,7 @@
if ((cap1 & PCIM_VC_CAP1_LOWPRI_EXT_COUNT) != 0)
printf(" lowpri VC0-VC%d",
(cap1 & PCIM_VC_CAP1_LOWPRI_EXT_COUNT) >> 4);
+ printf("\n");
}
static void
@@ -668,7 +670,7 @@
return;
low = read_config(fd, &p->pc_sel, ptr + PCIR_SERIAL_LOW, 4);
high = read_config(fd, &p->pc_sel, ptr + PCIR_SERIAL_HIGH, 4);
- printf(" %08x%08x", high, low);
+ printf(" %08x%08x\n", high, low);
}
static void
@@ -680,7 +682,7 @@
if (ver < 1)
return;
val = read_config(fd, &p->pc_sel, ptr + 4, 4);
- printf(" ID %d", val & 0xffff);
+ printf(" ID %d\n", val & 0xffff);
}
static void
@@ -692,7 +694,69 @@
if (ver < 1)
return;
val = read_config(fd, &p->pc_sel, ptr + 8, 4);
- printf(" lane errors %#x", val);
+ printf(" lane errors %#x\n", val);
+}
+
+static const char *
+check_enabled(int value)
+{
+
+ return (value ? "enabled" : "disabled");
+}
+
+static void
+ecap_sriov(int fd, struct pci_conf *p, uint16_t ptr, uint8_t ver)
+{
+ const char *comma, *enabled;
+ uint16_t iov_ctl, total_vfs, num_vfs, vf_offset, vf_stride, vf_did;
+ uint32_t page_caps, page_size, page_shift, size;
+ int i;
+
+ printf("SR-IOV %d ", ver);
+
+ iov_ctl = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_CTL, 2);
+ printf("IOV %s, Memory Space %s, ARI %s\n",
+ check_enabled(iov_ctl & PCIM_SRIOV_VF_EN),
+ check_enabled(iov_ctl & PCIM_SRIOV_VF_MSE),
+ check_enabled(iov_ctl & PCIM_SRIOV_ARI_EN));
+
+ total_vfs = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_TOTAL_VFS, 2);
+ num_vfs = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_NUM_VFS, 2);
+ printf(" ");
+ printf("%d VFs configured out of %d supported\n", num_vfs, total_vfs);
+
+ vf_offset = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_VF_OFF, 2);
+ vf_stride = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_VF_STRIDE, 2);
+ printf(" ");
+ printf("First VF RID Offset 0x%04x, VF RID Stride 0x%04x\n", vf_offset,
+ vf_stride);
+
+ vf_did = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_VF_DID, 2);
+ printf(" VF Device ID 0x%04x\n", vf_did);
+
+ page_caps = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_PAGE_CAP, 4);
+ page_size = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_PAGE_SIZE, 4);
+ printf(" ");
+ printf("Page Sizes: ");
+ comma = "";
+ while (page_caps != 0) {
+ page_shift = ffs(page_caps) - 1;
+
+ if (page_caps & page_size)
+ enabled = " (enabled)";
+ else
+ enabled = "";
+
+ size = (1 << (page_shift + PCI_SRIOV_BASE_PAGE_SHIFT));
+ printf("%s%d%s", comma, size, enabled);
+ comma = ", ";
+
+ page_caps &= ~(1 << page_shift);
+ }
+ printf("\n");
+
+ for (i = 0; i <= PCIR_MAX_BAR_0; i++)
+ print_bar(fd, p, "iov bar ", ptr + PCIR_SRIOV_BAR(i));
}
struct {
@@ -708,7 +772,6 @@
{ PCIZ_ACS, "ACS" },
{ PCIZ_ARI, "ARI" },
{ PCIZ_ATS, "ATS" },
- { PCIZ_SRIOV, "SRIOV" },
{ PCIZ_MULTICAST, "Multicast" },
{ PCIZ_RESIZE_BAR, "Resizable BAR" },
{ PCIZ_DPA, "DPA" },
@@ -747,6 +810,9 @@
case PCIZ_SEC_PCIE:
ecap_sec_pcie(fd, p, ptr, PCI_EXTCAP_VER(ecap));
break;
+ case PCIZ_SRIOV:
+ ecap_sriov(fd, p, ptr, PCI_EXTCAP_VER(ecap));
+ break;
default:
name = "unknown";
for (i = 0; ecap_names[i].name != NULL; i++)
@@ -754,10 +820,9 @@
name = ecap_names[i].name;
break;
}
- printf("%s %d", name, PCI_EXTCAP_VER(ecap));
+ printf("%s %d\n", name, PCI_EXTCAP_VER(ecap));
break;
}
- printf("\n");
ptr = PCI_EXTCAP_NEXTPTR(ecap);
if (ptr == 0)
break;
Index: usr.sbin/pciconf/pciconf.h
===================================================================
--- usr.sbin/pciconf/pciconf.h
+++ usr.sbin/pciconf/pciconf.h
@@ -27,7 +27,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: head/usr.sbin/pciconf/pciconf.h 279466 2015-03-01 00:59:35Z rstone $
*/
#ifndef __PCICONF_H__
@@ -37,6 +37,7 @@
void list_errors(int fd, struct pci_conf *p);
uint8_t pci_find_cap(int fd, struct pci_conf *p, uint8_t id);
uint16_t pcie_find_cap(int fd, struct pci_conf *p, uint16_t id);
+void print_bar(int fd, struct pci_conf *p, const char *label, uint16_t bar);
uint32_t read_config(int fd, struct pcisel *sel, long reg, int width);
#endif
Index: usr.sbin/pciconf/pciconf.c
===================================================================
--- usr.sbin/pciconf/pciconf.c
+++ usr.sbin/pciconf/pciconf.c
@@ -29,7 +29,7 @@
#ifndef lint
static const char rcsid[] =
- "$FreeBSD$";
+ "$FreeBSD: head/usr.sbin/pciconf/pciconf.c 287522 2015-09-06 20:05:29Z bapt $";
#endif /* not lint */
#include <sys/types.h>
@@ -234,9 +234,9 @@
for (p = conf; p < &conf[pc.num_matches]; p++) {
printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
- (p->pd_name && *p->pd_name) ? p->pd_name :
+ *p->pd_name ? p->pd_name :
"none",
- (p->pd_name && *p->pd_name) ? (int)p->pd_unit :
+ *p->pd_name ? (int)p->pd_unit :
none_count++, p->pc_sel.pc_domain,
p->pc_sel.pc_bus, p->pc_sel.pc_dev,
p->pc_sel.pc_func, (p->pc_class << 16) |
@@ -263,10 +263,7 @@
static void
list_bars(int fd, struct pci_conf *p)
{
- struct pci_bar_io bar;
- uint64_t base;
- const char *type;
- int i, range, max;
+ int i, max;
switch (p->pc_hdr & PCIM_HDRTYPE) {
case PCIM_HDRTYPE_NORMAL:
@@ -282,40 +279,50 @@
return;
}
- for (i = 0; i <= max; i++) {
- bar.pbi_sel = p->pc_sel;
- bar.pbi_reg = PCIR_BAR(i);
- if (ioctl(fd, PCIOCGETBAR, &bar) < 0)
- continue;
- if (PCI_BAR_IO(bar.pbi_base)) {
- type = "I/O Port";
+ for (i = 0; i <= max; i++)
+ print_bar(fd, p, "bar ", PCIR_BAR(i));
+}
+
+void
+print_bar(int fd, struct pci_conf *p, const char *label, uint16_t bar_offset)
+{
+ uint64_t base;
+ const char *type;
+ struct pci_bar_io bar;
+ int range;
+
+ bar.pbi_sel = p->pc_sel;
+ bar.pbi_reg = bar_offset;
+ if (ioctl(fd, PCIOCGETBAR, &bar) < 0)
+ return;
+ if (PCI_BAR_IO(bar.pbi_base)) {
+ type = "I/O Port";
+ range = 32;
+ base = bar.pbi_base & PCIM_BAR_IO_BASE;
+ } else {
+ if (bar.pbi_base & PCIM_BAR_MEM_PREFETCH)
+ type = "Prefetchable Memory";
+ else
+ type = "Memory";
+ switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) {
+ case PCIM_BAR_MEM_32:
range = 32;
- base = bar.pbi_base & PCIM_BAR_IO_BASE;
- } else {
- if (bar.pbi_base & PCIM_BAR_MEM_PREFETCH)
- type = "Prefetchable Memory";
- else
- type = "Memory";
- switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) {
- case PCIM_BAR_MEM_32:
- range = 32;
- break;
- case PCIM_BAR_MEM_1MB:
- range = 20;
- break;
- case PCIM_BAR_MEM_64:
- range = 64;
- break;
- default:
- range = -1;
- }
- base = bar.pbi_base & ~((uint64_t)0xf);
+ break;
+ case PCIM_BAR_MEM_1MB:
+ range = 20;
+ break;
+ case PCIM_BAR_MEM_64:
+ range = 64;
+ break;
+ default:
+ range = -1;
}
- printf(" bar [%02x] = type %s, range %2d, base %#jx, ",
- PCIR_BAR(i), type, range, (uintmax_t)base);
- printf("size %ju, %s\n", (uintmax_t)bar.pbi_length,
- bar.pbi_enabled ? "enabled" : "disabled");
+ base = bar.pbi_base & ~((uint64_t)0xf);
}
+ printf(" %s[%02x] = type %s, range %2d, base %#jx, ",
+ label, bar_offset, type, range, (uintmax_t)base);
+ printf("size %ju, %s\n", (uintmax_t)bar.pbi_length,
+ bar.pbi_enabled ? "enabled" : "disabled");
}
static void

File Metadata

Mime Type
text/plain
Expires
Sun, Mar 8, 1:35 AM (8 h, 57 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
29389850
Default Alt Text
D4232.diff (168 KB)

Event Timeline