Index: etc/defaults/rc.conf =================================================================== --- etc/defaults/rc.conf +++ etc/defaults/rc.conf @@ -682,6 +682,8 @@ rctl_enable="NO" # Load rctl(8) rules on boot rctl_rules="/etc/rctl.conf" # rctl(8) ruleset. See rctl.conf(5). +iovctl_files="" # Config files for iovctl(8) + ############################################################## ### Jail Configuration (see rc.conf(5) manual page) ########## ############################################################## Index: etc/rc.d/Makefile =================================================================== --- etc/rc.d/Makefile +++ etc/rc.d/Makefile @@ -44,6 +44,7 @@ hostid_save \ hostname \ initrandom \ + iovctl \ ip6addrctl \ ipfilter \ ipfs \ Index: etc/rc.d/iovctl =================================================================== --- /dev/null +++ etc/rc.d/iovctl @@ -0,0 +1,39 @@ +#!/bin/sh +# +# $FreeBSD: head/etc/rc.d/iovctl 284891 2015-06-27 18:01:50Z pkelsey $ +# + +# PROVIDE: iovctl +# REQUIRE: FILESYSTEMS sysctl + +. /etc/rc.subr + +name="iovctl" +command="/usr/sbin/iovctl" +start_cmd="iovctl_start" +stop_cmd="iovctl_stop" + +run_iovctl() +{ + local _f flag + + flag=$1 + for _f in ${iovctl_files} ; do + if [ -r ${_f} ]; then + ${command} ${flag} -f ${_f} > /dev/null + fi + done +} + +iovctl_start() +{ + run_iovctl -C +} + +iovctl_stop() +{ + run_iovctl -D +} + +load_rc_config $name +run_rc_command "$1" Index: etc/rc.d/netif =================================================================== --- etc/rc.d/netif +++ etc/rc.d/netif @@ -26,7 +26,7 @@ # # PROVIDE: netif -# REQUIRE: atm1 FILESYSTEMS serial sppp sysctl +# REQUIRE: atm1 FILESYSTEMS iovctl serial sppp sysctl # REQUIRE: ipfilter ipfs # KEYWORD: nojailvnet Index: share/man/man5/rc.conf.5 =================================================================== --- share/man/man5/rc.conf.5 +++ share/man/man5/rc.conf.5 @@ -4524,6 +4524,11 @@ .Xr rctl.conf 5 ruleset to load for .Xr rctl 8 . +.It Va iovctl_files +.Pq Vt str +A space-separated list of configuration files used by +.Xr iovctl 8 . +The default value is an empty string. .El .Sh FILES .Bl -tag -width ".Pa /etc/defaults/rc.conf" -compact @@ -4577,6 +4582,7 @@ .Xr hcsecd 8 , .Xr ifconfig 8 , .Xr inetd 8 , +.Xr iovctl 8 , .Xr ipf 8 , .Xr ipfw 8 , .Xr ipnat 8 , Index: share/man/man9/Makefile =================================================================== --- share/man/man9/Makefile +++ share/man/man9/Makefile @@ -194,6 +194,10 @@ p_candebug.9 \ p_cansee.9 \ pci.9 \ + PCI_IOV_ADD_VF.9 \ + PCI_IOV_INIT.9 \ + pci_iov_schema.9 \ + PCI_IOV_UNINIT.9 \ pfil.9 \ pfind.9 \ pget.9 \ Index: share/man/man9/PCI_IOV_ADD_VF.9 =================================================================== --- /dev/null +++ share/man/man9/PCI_IOV_ADD_VF.9 @@ -0,0 +1,112 @@ +.\" +.\" Copyright (c) 2014 Sandvine Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: head/share/man/man9/PCI_IOV_ADD_VF.9 286663 2015-08-12 11:56:19Z brueffer $ +.\" +.Dd May 28, 2015 +.Dt PCI_IOV_ADD_VF 9 +.Os +.Sh NAME +.Nm PCI_IOV_ADD_VF +.Nd inform a PF driver that a VF is being created +.Sh SYNOPSIS +.In sys/bus.h +.In machine/stdarg.h +.In sys/nv.h +.In dev/pci/pci_iov.h +.Ft int +.Fn PCI_IOV_ADD_VF "device_t dev" "uint16_t vfnum" "const nvlist_t *vf_config" +.Sh DESCRIPTION +The +.Fn PCI_IOV_ADD_VF +method is called by the PCI Single-Root I/O Virtualization +.Pq SR-IOV +infrastructure when it is initializating a new Virtual Function (VF) as a child +of the given Physical Function (PF) device. +This method will not be called until a successful call to +.Xr PCI_IOV_INIT 9 +has been made. +It is not guaranteed that this method will be called following a successful call +to +.Xr PCI_IOV_INIT 9 . +If the infrastructure encounters a failure to allocate resources following the +call to +.Xr PCI_IOV_INIT 9 , +the VF creation will be aborted and +.Xr PCI_IOV_UNINIT 9 +will be called immediately without any preceding calls to +.Nm . +.Pp +The index of the VF being initialized is passed in the +.Fa vfnum +argument. +VFs are always numbered sequentially starting at 0. +.Pp +If the driver requested device-specific configuration parameters via a VF schema +in its call to +.Xr pci_iov_attach 9 , +those parameters will be contained in the +.Pa vf_config +argument. +All configuration parameters that were either set as required parameters or that +had a default value set in the VF schema are guaranteed to be present in +.Fa vf_config . +Configuration parameters that were neither set as required nor were given a +default value are optional and may or may not be present in +.Fa vf_config . +.Fa vf_config +will not contain any configuration parameters that were not specified in the VF +schema. +All configuration parameters will have the correct type and will be in the range +of valid values specified in the schema. +.Pp +Note that it is possible for the user to set different configuration values on +different VF devices that are children of the same PF. +The PF driver must not cache configuration parameters passed in previous calls +to +.Fn PCI_IOV_ADD_VF +for other VFs and apply those parameters to the current VF. +.Pp +This function will not be called twice for the same +.Fa vf_num +on the same PF device without +.Xr PCI_IOV_UNINIT 9 +and +.Xr PCI_IOV_INIT 9 +first being called, in that order. +.Sh RETURN VALUES +This method returns 0 on success, otherwise an appropriate error is returned. +If this method returns an error then the current VF device will be destroyed +but the rest of the VF devices will be created and SR-IOV will be enabled on +the PF. +.Sh SEE ALSO +.Xr nv 9 , +.Xr pci 9 , +.Xr PCI_IOV_INIT 9 , +.Xr pci_iov_schema 9 , +.Xr PCI_IOV_UNINIT 9 +.Sh AUTHORS +This manual page was written by +.An Ryan Stone Aq Mt rstone@FreeBSD.org . Index: share/man/man9/PCI_IOV_INIT.9 =================================================================== --- /dev/null +++ share/man/man9/PCI_IOV_INIT.9 @@ -0,0 +1,85 @@ +.\" +.\" Copyright (c) 2014 Sandvine Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: head/share/man/man9/PCI_IOV_INIT.9 286663 2015-08-12 11:56:19Z brueffer $ +.\" +.Dd May 28, 2015 +.Dt PCI_IOV_INIT 9 +.Os +.Sh NAME +.Nm PCI_IOV_INIT +.Nd enable SR-IOV on a PF device +.Sh SYNOPSIS +.In sys/bus.h +.In machine/stdarg.h +.In sys/nv.h +.In dev/pci/pci_iov.h +.Ft int +.Fn PCI_IOV_INIT "device_t dev" "uint16_t num_vfs" "const nvlist_t *pf_config" +.Sh DESCRIPTION +The +.Fn PCI_IOV_INIT +method is called by the PCI Single-Root I/O Virtualization (SR-IOV) +infrastucture when the user requests that SR-IOV be enabled on a Physical +Function (PF). +The number of Virtual Functions (VFs) that will be created is passed to this +method in the +.Fa num_vfs +argument. +.Pp +If the driver requested device-specific PF configuration parameters via a PF +schema in its call to +.Xr pci_iov_attach 9 , +those parameters will be available in the +.Fa pf_config +argument. +All configuration parameters that were either set as required parameters or that +had a default value set in the PF schema are guaranteed to be present in +.Fa pf_config . +Configuration parameters that were neither set as required nor were given a +default value are optional and may or may not be present in +.Fa pf_config . +.Fa pf_config +will not contain any configuration parameters that were not specified in the PF +schema. +All configuration parameters will have the correct type and are in the range of +valid values specified in the schema. +.Pp +If this method returns successfully, then this method will not be called again +on the same device until after a call to +.Xr PCI_IOV_UNINIT . +.Sh RETURN VALUES +Returns 0 on success, otherwise an appropriate error is returned. +If this method returns an error then the SR-IOV configuration will be aborted +and no VFs will be created. +.Sh SEE ALSO +.Xr nv 9 , +.Xr pci 9 , +.Xr PCI_IOV_ADD_VF 9 , +.Xr pci_iov_schema 9 , +.Xr PCI_IOV_UNINIT 9 +.Sh AUTHORS +This manual page was written by +.An Ryan Stone Aq Mt rstone@FreeBSD.org . Index: share/man/man9/PCI_IOV_UNINIT.9 =================================================================== --- /dev/null +++ share/man/man9/PCI_IOV_UNINIT.9 @@ -0,0 +1,63 @@ +.\" +.\" Copyright (c) 2014 Sandvine Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: head/share/man/man9/PCI_IOV_UNINIT.9 283670 2015-05-28 22:01:50Z jhb $ +.\" +.Dd May 28, 2015 +.Dt PCI_IOV_UNINIT 9 +.Os +.Sh NAME +.Nm PCI_IOV_UNINIT +.Nd disable SR-IOV on a PF device +.Sh SYNOPSIS +.In sys/bus.h +.In dev/pci/pci_iov.h +.Ft void +.Fn PCI_IOV_UNINIT "device_t dev" +.Sh DESCRIPTION +The +.Fn PCI_IOV_UNINIT +method is called by the PCI Single-Root I/O Virtualization (SR-IOV) +infrastructure when the user requests that SR-IOV be disabled on a Physical +Function (PF). +When this method is called, the PF driver must release any SR-IOV-related +resources that it has allocated and disable any device-specific SR-IOV +configuration in the device. +.Pp +This method will only be called following a successful call to +.Xr PCI_IOV_INIT . +It is not guaranteed that +.Xr PCI_IOV_ADD_VF +will have been called for any Virtual Function (VF) after the call to +.Xr PCI_IOV_INIT +and before the call to +.Nm . +.Sh SEE ALSO +.Xr pci 9 , +.Xr PCI_IOV_ADD_VF 9 , +.Xr PCI_IOV_INIT 9 +.Sh AUTHORS +This manual page was written by +.An Ryan Stone Aq Mt rstone@FreeBSD.org . Index: share/man/man9/pci_iov_schema.9 =================================================================== --- /dev/null +++ share/man/man9/pci_iov_schema.9 @@ -0,0 +1,265 @@ +.\" +.\" Copyright (c) 2014 Sandvine Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: head/share/man/man9/pci_iov_schema.9 285273 2015-07-08 16:16:44Z pkelsey $ +.\" +.Dd July 8, 2015 +.Dt PCI_IOV_SCHEMA 9 +.Os +.Sh NAME +.Nm pci_iov_schema , +.Nm pci_iov_schema_alloc_node , +.Nm pci_iov_schema_add_bool , +.Nm pci_iov_schema_add_string , +.Nm pci_iov_schema_add_uint8 , +.Nm pci_iov_schema_add_uint16 , +.Nm pci_iov_schema_add_uint32 , +.Nm pci_iov_schema_add_uint64 , +.Nm pci_iov_schema_add_unicast_mac +.Nd PCI SR-IOV config schema interface +.Sh SYNOPSIS +.In machine/stdarg.h +.In sys/nv.h +.In sys/iov_schema.h +.Ft nvlist_t * +.Fn pci_iov_schema_alloc_node "void" +.Ft void +.Fn pci_iov_schema_add_bool "nvlist_t *schema" "const char *name" \ +"uint32_t flags" "int defaultVal" +.Ft void +.Fn pci_iov_schema_add_string "nvlist_t *schema" "const char *name" \ +"uint32_t flags" "const char *defaultVal" +.Ft void +.Fn pci_iov_schema_add_uint8 "nvlist_t *schema" "const char *name" \ +"uint32_t flags" "uint8_t defaultVal" +.Ft void +.Fn pci_iov_schema_add_uint16 "nvlist_t *schema" "const char *name" \ +"uint32_t flags" "uint16_t defaultVal" +.Ft void +.Fn pci_iov_schema_add_uint32 "nvlist_t *schema" "const char *name" \ +"uint32_t flags" "uint32_t defaultVal" +.Ft void +.Fn pci_iov_schema_add_uint64 "nvlist_t *schema" "const char *name" \ +"uint32_t flags" "uint64_t defaultVal" +.Ft void +.Fn pci_iov_schema_add_unicast_mac "nvlist_t *schema" "const char *name" \ +"uint32_t flags" "const uint8_t *defaultVal" +.Sh DESCRIPTION +The PCI Single-Root I/O Virtualization +.Pq SR-IOV +configuration schema is a data +structure that describes the device-specific configuration parameters that a PF +driver will accept when SR-IOV is enabled on the PF device. +Each PF driver defines two schema instances: the PF schema and the VF schema. +The PF schema describes configuration that applies to the PF device as a whole. +The VF schema describes configuration that applies to an individual VF device. +Different VF devices may have different configuration applied to them, as long +as the configuration for each VF conforms to the VF schema. +.Pp +A PF driver builds a configuration schema by first allocating a schema node and +then adding configuration parameter specifications to the schema. +The configuration parameter specification consists of a name and a value type. +.Pp +Configuration parameter names are case-insensitive. +It is an error to specify two or more configuration parameters with the same +name. +It is also an error to specific a configuration parameter that uses the same +name as a configuration parameter used by the SR-IOV infrastructure. +See +.Xr iovctl.conf 5 +for documentation of all configuration parameters used by the SR-IOV +infrastructure. +.Pp +The parameter type constrains the possible values that the configuration +parameter may take. +.Pp +A configuration parameter may be specified as a required parameter by setting +the +.Dv IOV_SCHEMA_REQUIRED +flag in the +.Pa flags +argument. +Required parameters must be specified by the user when SR-IOV is enabled. +If the user does not specify a required parameter, the SR-IOV infrastructure +will abort the request to enable SR-IOV and return an error to the user. +.Pp +Alternatively, a configuration parameter may be given a default value by +setting the +.Dv IOV_SCHEMA_HASDEFAULT +flag in the +.Pa flags +argument. +If a configuration parameter has a default value but the user has not specified +a value for that parameter, then the SR-IOV infrastructure will apply +.Pa defaultVal +for that parameter in the configuration before passing it to the PF driver. +It is an error for the value of the +.Pa defaultVal +parameter to not conform to the restrictions of the specified type. +If this flag is not specified then the +.Pa defaultVal +argument is ignored. +This flag is not compatible with the +.Dv IOV_SCHEMA_REQUIRED +flag; it is an error to specify both on the same parameter. +.Pp +The SR-IOV infrastructure guarantees that all configuration parameters that are +either specified as required or given a default value will be present in the +configuration passed to the PF driver. +Configuration parameters that are neither specified as required nor given a +default value are optional and may or may not be present in the configuration +passed to the PF driver. +.Pp +It is highly recommended that a PF driver reserve the use of optional parameters +for configuration that is truly optional. +For example, a Network Interface PF device might have the option to encapsulate +all traffic to and from a VF device in a vlan tag. +The PF driver could expose that option as a "vlan" parameter accepting an +integer argument specifying the vlan tag. +In this case, it would be appropriate to set the "vlan" parameter as an optional +parameter as it would be legitimate for a VF to be configured to have no vlan +tagging enabled at all. +.Pp +Alternatively, if the PF device had an boolean option that controlled whether +the VF was allowed to change its MAC address, it would not be appropriate to +set this parameter as optional. +The PF driver must either allow the MAC to change or not, so it would be more +appropriate for the PF driver to document the default behaviour by specifying +a default value in the schema +.Po or potentially force the user to make the choice by setting the parameter +to be required +.Pc . +.Pp +Configuration parameters that have security implications must default to the +most secure configuration possible. +.Pp +All device-specific configuration parameters must be documented in the manual +page for the PF driver, or in a separate manual page that is cross-referenced +from the main driver manual page. +.Pp +It is not necessary for a PF driver to check for failure from any of these +functions. +If an error occurs, it is flagged in the schema. +The +.Xr pci_iov_attach 9 +function checks for this error and will fail to initialize SR-IOV on the PF +device if an error is set in the schema. +If this occurs, it is recommended that the PF driver still succeed in attaching +and run with SR-IOV disabled on the device. +.Pp +The +.Fn pci_iov_schema_alloc_node +function is used to allocate an empty configuration schema. +It is not necessary to check for failure from this function. +The SR-IOV infrastructure will gracefully handle failure to allocate a schema +and will simply not enable SR-IOV on the PF device. +.Pp +The +.Fn pci_iov_schema_add_bool +function is used to specify a configuration parameter in the given schema with +the name +.Pa name +and having a boolean type. +Boolean values can only take the value true or false (1 or 0, respectively). +.Pp +The +.Fn pci_iov_schema_add_string +function is used to specify a configuration parameter in the given schema with +the name +.Pa name +and having a string type. +String values are standard C strings. +.Pp +The +.Fn pci_iov_schema_add_uint8 +function is used to specify a configuration parameter in the given schema with +the name +.Pa name +and having a +.Vt uint8_t +type. +Values of type +.Vt uint8_t +are unsigned integers in the range 0 to 255, inclusive. +.Pp +The +.Fn pci_iov_schema_add_uint16 +function is used to specify a configuration parameter in the given schema with +the name +.Pa name +and having a +.Vt uint16_t +type. +Values of type +.Vt uint16_t +are unsigned integers in the range 0 to 65535, inclusive. +.Pp +The +.Fn pci_iov_schema_add_uint32 +function is used to specify a configuration parameter in the given schema with +the name +.Pa name +and having a +.Vt uint32_t +type. +Values of type +.Vt uint32_t +are unsigned integers in the range 0 to +.Pq 2**32 - 1 , +inclusive. +.Pp +The +.Fn pci_iov_schema_add_uint64 +function is used to specify a configuration parameter in the given schema with +the name +.Pa name +and having a +.Vt uint64_t +type. +Values of type +.Vt uint64_t +are unsigned integers in the range 0 to +.Pq 2**64 - 1 , +inclusive. +.Pp +The +.Fn pci_iov_schema_add_unicast_mac +function is used to specify a configuration parameter in the given schema with +the name +.Pa name +and having a unicast-mac type. +Values of type unicast-mac are binary values exactly 6 bytes long. +The MAC address is guaranteed to not be a multicast or broadcast address. +.Sh RETURN VALUES +The +.Fn pci_iov_schema_alloc_node +function returns a pointer to the allocated schema, or NULL if a failure occurs. +.Sh SEE ALSO +.Xr pci 9 , +.Xr PCI_IOV_ADD_VF 9 , +.Xr PCI_IOV_INIT 9 +.Sh AUTHORS +This manual page was written by +.An Ryan Stone Aq rstone@FreeBSD.org . Index: sys/amd64/conf/GENERIC =================================================================== --- sys/amd64/conf/GENERIC +++ sys/amd64/conf/GENERIC @@ -90,6 +90,8 @@ device acpi options ACPI_DMAR device pci +options PCI_IOV # PCI SR-IOV support + # Floppy drives device fdc Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -2003,6 +2003,9 @@ dev/pci/isa_pci.c optional pci isa dev/pci/pci.c optional pci dev/pci/pci_if.m standard +dev/pci/pci_iov.c optional pci pci_iov +dev/pci/pci_iov_if.m standard +dev/pci/pci_iov_schema.c optional pci pci_iov dev/pci/pci_pci.c optional pci dev/pci/pci_subr.c optional pci dev/pci/pci_user.c optional pci Index: sys/conf/options =================================================================== --- sys/conf/options +++ sys/conf/options @@ -165,6 +165,7 @@ NSWBUF_MIN opt_swap.h MBUF_PACKET_ZONE_DISABLE opt_global.h PANIC_REBOOT_WAIT_TIME opt_panic.h +PCI_IOV opt_global.h PPC_DEBUG opt_ppc.h PPC_PROBE_CHIPSET opt_ppc.h PPS_SYNC opt_ntp.h Index: sys/dev/acpica/acpi_pci.c =================================================================== --- sys/dev/acpica/acpi_pci.c +++ sys/dev/acpica/acpi_pci.c @@ -84,6 +84,11 @@ static void acpi_pci_update_device(ACPI_HANDLE handle, device_t pci_child); static bus_dma_tag_t acpi_pci_get_dma_tag(device_t bus, device_t child); +#ifdef PCI_IOV +static device_t acpi_pci_create_iov_child(device_t bus, device_t pf, + uint16_t rid, uint16_t vid, uint16_t did); +#endif + static device_method_t acpi_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, acpi_pci_probe), @@ -98,6 +103,9 @@ /* PCI interface */ DEVMETHOD(pci_set_powerstate, acpi_pci_set_powerstate_method), +#ifdef PCI_IOV + DEVMETHOD(pci_create_iov_child, acpi_pci_create_iov_child), +#endif DEVMETHOD_END }; @@ -345,3 +353,23 @@ return (pci_get_dma_tag(bus, child)); } #endif + +#ifdef PCI_IOV +static device_t +acpi_pci_create_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid, + uint16_t did) +{ + struct acpi_pci_devinfo *dinfo; + device_t vf; + + vf = pci_add_iov_child(bus, pf, sizeof(struct acpi_pci_devinfo), rid, + vid, did); + if (vf == NULL) + return (NULL); + + dinfo = device_get_ivars(vf); + dinfo->ap_handle = NULL; + return (vf); +} +#endif + Index: sys/dev/pci/pci.c =================================================================== --- sys/dev/pci/pci.c +++ sys/dev/pci/pci.c @@ -77,7 +77,6 @@ static int pci_has_quirk(uint32_t devid, int quirk); static pci_addr_t pci_mapbase(uint64_t mapreg); static const char *pci_maptype(uint64_t mapreg); -static int pci_mapsize(uint64_t testval); static int pci_maprange(uint64_t mapreg); static pci_addr_t pci_rombase(uint64_t mapreg); static int pci_romsize(uint64_t testval); @@ -126,6 +125,9 @@ static uint16_t pci_get_rid_method(device_t dev, device_t child); +static struct pci_devinfo * pci_fill_devinfo(device_t pcib, int d, int b, int s, + int f, uint16_t vid, uint16_t did, size_t size); + static device_method_t pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pci_probe), @@ -185,6 +187,11 @@ DEVMETHOD(pci_msi_count, pci_msi_count_method), DEVMETHOD(pci_msix_count, pci_msix_count_method), DEVMETHOD(pci_get_rid, pci_get_rid_method), +#ifdef PCI_IOV + DEVMETHOD(pci_iov_attach, pci_iov_attach_method), + DEVMETHOD(pci_iov_detach, pci_iov_detach_method), + DEVMETHOD(pci_create_iov_child, pci_create_iov_child_method), +#endif DEVMETHOD_END }; @@ -493,7 +500,7 @@ /* return log2 of map size decoded for memory or port map */ -static int +int pci_mapsize(uint64_t testval) { int ln2size; @@ -537,7 +544,7 @@ } return (ln2size); } - + /* return log2 of address range supported by map register */ static int @@ -606,73 +613,81 @@ pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size) { #define REG(n, w) PCIB_READ_CONFIG(pcib, b, s, f, n, w) - pcicfgregs *cfg = NULL; - struct pci_devinfo *devlist_entry; - struct devlist *devlist_head; + uint16_t vid, did; - devlist_head = &pci_devq; + vid = REG(PCIR_VENDOR, 2); + did = REG(PCIR_DEVICE, 2); + if (vid != 0xffff) + return (pci_fill_devinfo(pcib, d, b, s, f, vid, did, size)); + + return (NULL); +} - devlist_entry = NULL; +static struct pci_devinfo * +pci_fill_devinfo(device_t pcib, int d, int b, int s, int f, uint16_t vid, + uint16_t did, size_t size) +{ + struct pci_devinfo *devlist_entry; + pcicfgregs *cfg; - if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) { - devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); - if (devlist_entry == NULL) - return (NULL); + devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); + cfg = &devlist_entry->cfg; + + cfg->domain = d; + cfg->bus = b; + cfg->slot = s; + cfg->func = f; + cfg->vendor = vid; + cfg->device = did; + cfg->cmdreg = REG(PCIR_COMMAND, 2); + cfg->statreg = REG(PCIR_STATUS, 2); + cfg->baseclass = REG(PCIR_CLASS, 1); + cfg->subclass = REG(PCIR_SUBCLASS, 1); + cfg->progif = REG(PCIR_PROGIF, 1); + cfg->revid = REG(PCIR_REVID, 1); + cfg->hdrtype = REG(PCIR_HDRTYPE, 1); + cfg->cachelnsz = REG(PCIR_CACHELNSZ, 1); + cfg->lattimer = REG(PCIR_LATTIMER, 1); + cfg->intpin = REG(PCIR_INTPIN, 1); + cfg->intline = REG(PCIR_INTLINE, 1); + + cfg->mfdev = (cfg->hdrtype & PCIM_MFDEV) != 0; + cfg->hdrtype &= ~PCIM_MFDEV; + STAILQ_INIT(&cfg->maps); + + cfg->devinfo_size = size; + cfg->iov = NULL; + + pci_fixancient(cfg); + pci_hdrtypedata(pcib, b, s, f, cfg); + + if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) + pci_read_cap(pcib, cfg); + + STAILQ_INSERT_TAIL(&pci_devq, devlist_entry, pci_links); + + devlist_entry->conf.pc_sel.pc_domain = cfg->domain; + devlist_entry->conf.pc_sel.pc_bus = cfg->bus; + devlist_entry->conf.pc_sel.pc_dev = cfg->slot; + devlist_entry->conf.pc_sel.pc_func = cfg->func; + devlist_entry->conf.pc_hdr = cfg->hdrtype; + + devlist_entry->conf.pc_subvendor = cfg->subvendor; + devlist_entry->conf.pc_subdevice = cfg->subdevice; + devlist_entry->conf.pc_vendor = cfg->vendor; + devlist_entry->conf.pc_device = cfg->device; + + devlist_entry->conf.pc_class = cfg->baseclass; + devlist_entry->conf.pc_subclass = cfg->subclass; + devlist_entry->conf.pc_progif = cfg->progif; + devlist_entry->conf.pc_revid = cfg->revid; + + pci_numdevs++; + pci_generation++; - cfg = &devlist_entry->cfg; - - cfg->domain = d; - cfg->bus = b; - cfg->slot = s; - cfg->func = f; - cfg->vendor = REG(PCIR_VENDOR, 2); - cfg->device = REG(PCIR_DEVICE, 2); - cfg->cmdreg = REG(PCIR_COMMAND, 2); - cfg->statreg = REG(PCIR_STATUS, 2); - cfg->baseclass = REG(PCIR_CLASS, 1); - cfg->subclass = REG(PCIR_SUBCLASS, 1); - cfg->progif = REG(PCIR_PROGIF, 1); - cfg->revid = REG(PCIR_REVID, 1); - cfg->hdrtype = REG(PCIR_HDRTYPE, 1); - cfg->cachelnsz = REG(PCIR_CACHELNSZ, 1); - cfg->lattimer = REG(PCIR_LATTIMER, 1); - cfg->intpin = REG(PCIR_INTPIN, 1); - cfg->intline = REG(PCIR_INTLINE, 1); - - cfg->mfdev = (cfg->hdrtype & PCIM_MFDEV) != 0; - cfg->hdrtype &= ~PCIM_MFDEV; - STAILQ_INIT(&cfg->maps); - - pci_fixancient(cfg); - pci_hdrtypedata(pcib, b, s, f, cfg); - - if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) - pci_read_cap(pcib, cfg); - - STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links); - - devlist_entry->conf.pc_sel.pc_domain = cfg->domain; - devlist_entry->conf.pc_sel.pc_bus = cfg->bus; - devlist_entry->conf.pc_sel.pc_dev = cfg->slot; - devlist_entry->conf.pc_sel.pc_func = cfg->func; - devlist_entry->conf.pc_hdr = cfg->hdrtype; - - devlist_entry->conf.pc_subvendor = cfg->subvendor; - devlist_entry->conf.pc_subdevice = cfg->subdevice; - devlist_entry->conf.pc_vendor = cfg->vendor; - devlist_entry->conf.pc_device = cfg->device; - - devlist_entry->conf.pc_class = cfg->baseclass; - devlist_entry->conf.pc_subclass = cfg->subclass; - devlist_entry->conf.pc_progif = cfg->progif; - devlist_entry->conf.pc_revid = cfg->revid; - - pci_numdevs++; - pci_generation++; - } return (devlist_entry); -#undef REG } +#undef REG static void pci_read_cap(device_t pcib, pcicfgregs *cfg) @@ -2673,8 +2688,9 @@ return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0; } -static void -pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp) +void +pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp, + int *bar64) { struct pci_devinfo *dinfo; pci_addr_t map, testval; @@ -2694,6 +2710,8 @@ pci_write_config(dev, reg, map, 4); *mapp = map; *testvalp = testval; + if (bar64 != NULL) + *bar64 = 0; return; } @@ -2735,6 +2753,8 @@ *mapp = map; *testvalp = testval; + if (bar64 != NULL) + *bar64 = (ln2range == 64); } static void @@ -2789,7 +2809,7 @@ return ((cmd & PCIM_CMD_PORTEN) != 0); } -static struct pci_map * +struct pci_map * pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size) { struct pci_devinfo *dinfo; @@ -2860,7 +2880,7 @@ return (barlen); } - pci_read_bar(dev, reg, &map, &testval); + pci_read_bar(dev, reg, &map, &testval, NULL); if (PCI_BAR_MEM(map)) { type = SYS_RES_MEMORY; if (map & PCIM_BAR_MEM_PREFETCH) @@ -3594,6 +3614,51 @@ #undef REG } +#ifdef PCI_IOV +device_t +pci_add_iov_child(device_t bus, device_t pf, size_t size, uint16_t rid, + uint16_t vid, uint16_t did) +{ + struct pci_devinfo *pf_dinfo, *vf_dinfo; + device_t pcib; + int busno, slot, func; + + pf_dinfo = device_get_ivars(pf); + + /* + * Do a sanity check that we have been passed the correct size. If this + * test fails then likely the pci subclass hasn't implemented the + * pci_create_iov_child method like it's supposed it. + */ + if (size != pf_dinfo->cfg.devinfo_size) { + device_printf(pf, + "PCI subclass does not properly implement PCI_IOV\n"); + return (NULL); + } + + pcib = device_get_parent(bus); + + PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func); + + vf_dinfo = pci_fill_devinfo(pcib, pci_get_domain(pcib), busno, slot, func, + vid, did, size); + + vf_dinfo->cfg.flags |= PCICFG_VF; + pci_add_child(bus, vf_dinfo); + + return (vf_dinfo->cfg.dev); +} + +device_t +pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid, + uint16_t vid, uint16_t did) +{ + + return (pci_add_iov_child(bus, pf, sizeof(struct pci_devinfo), rid, vid, + did)); +} +#endif + void pci_add_child(device_t bus, struct pci_devinfo *dinfo) { @@ -4523,7 +4588,7 @@ static struct resource * pci_reserve_map(device_t dev, device_t child, int type, int *rid, - u_long start, u_long end, u_long count, u_int flags) + u_long start, u_long end, u_long count, u_int num, u_int flags) { struct pci_devinfo *dinfo = device_get_ivars(child); struct resource_list *rl = &dinfo->resources; @@ -4547,7 +4612,7 @@ * have a atapci device in legacy mode and it fails * here, that other code is broken. */ - pci_read_bar(child, *rid, &map, &testval); + pci_read_bar(child, *rid, &map, &testval, NULL); /* * Determine the size of the BAR and ignore BARs with a size @@ -4620,8 +4685,8 @@ } struct resource * -pci_alloc_resource(device_t dev, device_t child, int type, int *rid, - u_long start, u_long end, u_long count, u_int flags) +pci_alloc_multi_resource(device_t dev, device_t child, int type, int *rid, + u_long start, u_long end, u_long count, u_long num, u_int flags) { struct pci_devinfo *dinfo; struct resource_list *rl; @@ -4689,7 +4754,7 @@ rle = resource_list_find(rl, type, *rid); if (rle == NULL) { res = pci_reserve_map(dev, child, type, rid, start, end, - count, flags); + count, num, flags); if (res == NULL) return (NULL); } @@ -4698,6 +4763,38 @@ start, end, count, flags)); } +struct resource * +pci_alloc_resource(device_t dev, device_t child, int type, int *rid, + u_long start, u_long end, u_long count, u_int flags) +{ +#ifdef PCI_IOV + struct pci_devinfo *dinfo; +#endif + + if (device_get_parent(child) != dev) + return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child, + type, rid, start, end, count, flags)); + +#ifdef PCI_IOV + dinfo = device_get_ivars(child); + if (dinfo->cfg.flags & PCICFG_VF) { + switch (type) { + /* VFs can't have I/O BARs. */ + case SYS_RES_IOPORT: + return (NULL); + case SYS_RES_MEMORY: + return (pci_vf_alloc_mem_resource(dev, child, rid, + start, end, count, flags)); + } + + /* Fall through for other types of resource allocations. */ + } +#endif + + return (pci_alloc_multi_resource(dev, child, type, rid, start, end, + count, 1, flags)); +} + int pci_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r) @@ -4712,6 +4809,22 @@ dinfo = device_get_ivars(child); cfg = &dinfo->cfg; + +#ifdef PCI_IOV + if (dinfo->cfg.flags & PCICFG_VF) { + switch (type) { + /* VFs can't have I/O BARs. */ + case SYS_RES_IOPORT: + return (EDOOFUS); + case SYS_RES_MEMORY: + return (pci_vf_release_mem_resource(dev, child, rid, + r)); + } + + /* Fall through for other types of resource allocations. */ + } +#endif + #ifdef NEW_PCIB /* * PCI-PCI bridge I/O window resources are not BARs. For @@ -4874,6 +4987,38 @@ struct pci_devinfo *dinfo = device_get_ivars(child); pcicfgregs *cfg = &dinfo->cfg; +#ifdef PCI_IOV + /* + * SR-IOV VFs don't implement the VID or DID registers, so we have to + * emulate them here. + */ + if (cfg->flags & PCICFG_VF) { + if (reg == PCIR_VENDOR) { + switch (width) { + case 4: + return (cfg->device << 16 | cfg->vendor); + case 2: + return (cfg->vendor); + case 1: + return (cfg->vendor & 0xff); + default: + return (0xffffffff); + } + } else if (reg == PCIR_DEVICE) { + switch (width) { + /* Note that an unaligned 4-byte read is an error. */ + case 2: + return (cfg->device); + case 1: + return (cfg->device & 0xff); + default: + return (0xffffffff); + } + } + } +#endif + + return (PCIB_READ_CONFIG(device_get_parent(dev), cfg->bus, cfg->slot, cfg->func, reg, width)); } Index: sys/dev/pci/pci_if.m =================================================================== --- sys/dev/pci/pci_if.m +++ sys/dev/pci/pci_if.m @@ -36,8 +36,19 @@ { return (0); } + + static device_t + null_create_iov_child(device_t bus, device_t pf, uint16_t rid, + uint16_t vid, uint16_t did) + { + device_printf(bus, "PCI_IOV not implemented on this bus.\n"); + return (NULL); + } }; +HEADER { + struct nvlist; +} METHOD u_int32_t read_config { device_t dev; @@ -165,3 +176,22 @@ device_t child; }; +METHOD int iov_attach { + device_t dev; + device_t child; + struct nvlist *pf_schema; + struct nvlist *vf_schema; +}; + +METHOD int iov_detach { + device_t dev; + device_t child; +}; + +METHOD device_t create_iov_child { + device_t bus; + device_t pf; + uint16_t rid; + uint16_t vid; + uint16_t did; +} DEFAULT null_create_iov_child; Index: sys/dev/pci/pci_iov.h =================================================================== --- /dev/null +++ sys/dev/pci/pci_iov.h @@ -0,0 +1,49 @@ +/*- + * Copyright (c) 2013-2015 Sandvine Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/sys/dev/pci/pci_iov.h 283670 2015-05-28 22:01:50Z jhb $ + */ + +#ifndef _PCI_IOV_H_ +#define _PCI_IOV_H_ + +#include "pci_iov_if.h" + +struct nvlist; + +static __inline int +pci_iov_attach(device_t dev, struct nvlist *pf_schema, struct nvlist *vf_schema) +{ + return (PCI_IOV_ATTACH(device_get_parent(dev), dev, pf_schema, + vf_schema)); +} + +static __inline int +pci_iov_detach(device_t dev) +{ + return (PCI_IOV_DETACH(device_get_parent(dev), dev)); +} + +#endif /* !_PCI_IOV_H_ */ Index: sys/dev/pci/pci_iov.c =================================================================== --- /dev/null +++ sys/dev/pci/pci_iov.c @@ -0,0 +1,980 @@ +/*- + * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: head/sys/dev/pci/pci_iov.c 283670 2015-05-28 22:01:50Z jhb $"); + +#include "opt_bus.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "pcib_if.h" + +static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations"); + +static d_ioctl_t pci_iov_ioctl; + +static struct cdevsw iov_cdevsw = { + .d_version = D_VERSION, + .d_name = "iov", + .d_ioctl = pci_iov_ioctl +}; + +SYSCTL_DECL(_hw_pci); + +/* + * The maximum amount of memory we will allocate for user configuration of an + * SR-IOV device. 1MB ought to be enough for anyone, but leave this + * configurable just in case. + */ +static u_long pci_iov_max_config = 1024 * 1024; +SYSCTL_ULONG(_hw_pci, OID_AUTO, iov_max_config, CTLFLAG_RWTUN, + &pci_iov_max_config, 0, "Maximum allowed size of SR-IOV configuration."); + + +#define IOV_READ(d, r, w) \ + pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w) + +#define IOV_WRITE(d, r, v, w) \ + pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w) + +static nvlist_t *pci_iov_build_schema(nvlist_t **pf_schema, + nvlist_t **vf_schema); +static void pci_iov_build_pf_schema(nvlist_t *schema, + nvlist_t **driver_schema); +static void pci_iov_build_vf_schema(nvlist_t *schema, + nvlist_t **driver_schema); +static nvlist_t *pci_iov_get_pf_subsystem_schema(void); +static nvlist_t *pci_iov_get_vf_subsystem_schema(void); + +int +pci_iov_attach_method(device_t bus, device_t dev, nvlist_t *pf_schema, + nvlist_t *vf_schema) +{ + device_t pcib; + struct pci_devinfo *dinfo; + struct pcicfg_iov *iov; + nvlist_t *schema; + uint32_t version; + int error; + int iov_pos; + + dinfo = device_get_ivars(dev); + pcib = device_get_parent(bus); + schema = NULL; + + error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos); + + if (error != 0) + return (error); + + version = pci_read_config(dev, iov_pos, 4); + if (PCI_EXTCAP_VER(version) != 1) { + if (bootverbose) + device_printf(dev, + "Unsupported version of SR-IOV (%d) detected\n", + PCI_EXTCAP_VER(version)); + + return (ENXIO); + } + + iov = malloc(sizeof(*dinfo->cfg.iov), M_SRIOV, M_WAITOK | M_ZERO); + + mtx_lock(&Giant); + if (dinfo->cfg.iov != NULL) { + error = EBUSY; + goto cleanup; + } + iov->iov_pos = iov_pos; + + schema = pci_iov_build_schema(&pf_schema, &vf_schema); + if (schema == NULL) { + error = ENOMEM; + goto cleanup; + } + + error = pci_iov_validate_schema(schema); + if (error != 0) + goto cleanup; + iov->iov_schema = schema; + + iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev), + UID_ROOT, GID_WHEEL, 0600, "iov/%s", device_get_nameunit(dev)); + + if (iov->iov_cdev == NULL) { + error = ENOMEM; + goto cleanup; + } + + dinfo->cfg.iov = iov; + iov->iov_cdev->si_drv1 = dinfo; + mtx_unlock(&Giant); + + return (0); + +cleanup: + nvlist_destroy(schema); + nvlist_destroy(pf_schema); + nvlist_destroy(vf_schema); + free(iov, M_SRIOV); + mtx_unlock(&Giant); + return (error); +} + +int +pci_iov_detach_method(device_t bus, device_t dev) +{ + struct pci_devinfo *dinfo; + struct pcicfg_iov *iov; + + mtx_lock(&Giant); + dinfo = device_get_ivars(dev); + iov = dinfo->cfg.iov; + + if (iov == NULL) { + mtx_unlock(&Giant); + return (0); + } + + if (iov->iov_num_vfs != 0 || iov->iov_flags & IOV_BUSY) { + mtx_unlock(&Giant); + return (EBUSY); + } + + dinfo->cfg.iov = NULL; + + if (iov->iov_cdev) { + destroy_dev(iov->iov_cdev); + iov->iov_cdev = NULL; + } + nvlist_destroy(iov->iov_schema); + + free(iov, M_SRIOV); + mtx_unlock(&Giant); + + return (0); +} + +static nvlist_t * +pci_iov_build_schema(nvlist_t **pf, nvlist_t **vf) +{ + nvlist_t *schema, *pf_driver, *vf_driver; + + /* We always take ownership of the schemas. */ + pf_driver = *pf; + *pf = NULL; + vf_driver = *vf; + *vf = NULL; + + schema = pci_iov_schema_alloc_node(); + if (schema == NULL) + goto cleanup; + + pci_iov_build_pf_schema(schema, &pf_driver); + pci_iov_build_vf_schema(schema, &vf_driver); + + if (nvlist_error(schema) != 0) + goto cleanup; + + return (schema); + +cleanup: + nvlist_destroy(schema); + nvlist_destroy(pf_driver); + nvlist_destroy(vf_driver); + return (NULL); +} + +static void +pci_iov_build_pf_schema(nvlist_t *schema, nvlist_t **driver_schema) +{ + nvlist_t *pf_schema, *iov_schema; + + pf_schema = pci_iov_schema_alloc_node(); + if (pf_schema == NULL) { + nvlist_set_error(schema, ENOMEM); + return; + } + + iov_schema = pci_iov_get_pf_subsystem_schema(); + + /* + * Note that if either *driver_schema or iov_schema is NULL, then + * nvlist_move_nvlist will put the schema in the error state and + * SR-IOV will fail to initialize later, so we don't have to explicitly + * handle that case. + */ + nvlist_move_nvlist(pf_schema, DRIVER_CONFIG_NAME, *driver_schema); + nvlist_move_nvlist(pf_schema, IOV_CONFIG_NAME, iov_schema); + nvlist_move_nvlist(schema, PF_CONFIG_NAME, pf_schema); + *driver_schema = NULL; +} + +static void +pci_iov_build_vf_schema(nvlist_t *schema, nvlist_t **driver_schema) +{ + nvlist_t *vf_schema, *iov_schema; + + vf_schema = pci_iov_schema_alloc_node(); + if (vf_schema == NULL) { + nvlist_set_error(schema, ENOMEM); + return; + } + + iov_schema = pci_iov_get_vf_subsystem_schema(); + + /* + * Note that if either *driver_schema or iov_schema is NULL, then + * nvlist_move_nvlist will put the schema in the error state and + * SR-IOV will fail to initialize later, so we don't have to explicitly + * handle that case. + */ + nvlist_move_nvlist(vf_schema, DRIVER_CONFIG_NAME, *driver_schema); + nvlist_move_nvlist(vf_schema, IOV_CONFIG_NAME, iov_schema); + nvlist_move_nvlist(schema, VF_SCHEMA_NAME, vf_schema); + *driver_schema = NULL; +} + +static nvlist_t * +pci_iov_get_pf_subsystem_schema(void) +{ + nvlist_t *pf; + + pf = pci_iov_schema_alloc_node(); + if (pf == NULL) + return (NULL); + + pci_iov_schema_add_uint16(pf, "num_vfs", IOV_SCHEMA_REQUIRED, -1); + pci_iov_schema_add_string(pf, "device", IOV_SCHEMA_REQUIRED, NULL); + + return (pf); +} + +static nvlist_t * +pci_iov_get_vf_subsystem_schema(void) +{ + nvlist_t *vf; + + vf = pci_iov_schema_alloc_node(); + if (vf == NULL) + return (NULL); + + pci_iov_schema_add_bool(vf, "passthrough", IOV_SCHEMA_HASDEFAULT, 0); + + return (vf); +} + +static int +pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift) +{ + struct resource *res; + struct pcicfg_iov *iov; + device_t dev, bus; + u_long start, end; + pci_addr_t bar_size; + int rid; + + iov = dinfo->cfg.iov; + dev = dinfo->cfg.dev; + bus = device_get_parent(dev); + rid = iov->iov_pos + PCIR_SRIOV_BAR(bar); + bar_size = 1 << bar_shift; + + res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0ul, + ~0ul, 1, iov->iov_num_vfs, RF_ACTIVE); + + if (res == NULL) + return (ENXIO); + + iov->iov_bar[bar].res = res; + iov->iov_bar[bar].bar_size = bar_size; + iov->iov_bar[bar].bar_shift = bar_shift; + + start = rman_get_start(res); + end = rman_get_end(res); + return (rman_manage_region(&iov->rman, start, end)); +} + +static void +pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo) +{ + struct pci_iov_bar *bar; + uint64_t bar_start; + int i; + + for (i = 0; i <= PCIR_MAX_BAR_0; i++) { + bar = &iov->iov_bar[i]; + if (bar->res != NULL) { + bar_start = rman_get_start(bar->res) + + dinfo->cfg.vf.index * bar->bar_size; + + pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start, + bar->bar_shift); + } + } +} + +static int +pci_iov_parse_config(struct pcicfg_iov *iov, struct pci_iov_arg *arg, + nvlist_t **ret) +{ + void *packed_config; + nvlist_t *config; + int error; + + config = NULL; + packed_config = NULL; + + if (arg->len > pci_iov_max_config) { + error = EMSGSIZE; + goto out; + } + + packed_config = malloc(arg->len, M_SRIOV, M_WAITOK); + + error = copyin(arg->config, packed_config, arg->len); + if (error != 0) + goto out; + + config = nvlist_unpack(packed_config, arg->len, NV_FLAG_IGNORE_CASE); + if (config == NULL) { + error = EINVAL; + goto out; + } + + error = pci_iov_schema_validate_config(iov->iov_schema, config); + if (error != 0) + goto out; + + error = nvlist_error(config); + if (error != 0) + goto out; + + *ret = config; + config = NULL; + +out: + nvlist_destroy(config); + free(packed_config, M_SRIOV); + return (error); +} + +/* + * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV + * capability. This bit is only writeable on the lowest-numbered PF but + * affects all PFs on the device. + */ +static int +pci_iov_set_ari(device_t bus) +{ + device_t lowest; + device_t *devlist; + int i, error, devcount, lowest_func, lowest_pos, iov_pos, dev_func; + uint16_t iov_ctl; + + /* If ARI is disabled on the downstream port there is nothing to do. */ + if (!PCIB_ARI_ENABLED(device_get_parent(bus))) + return (0); + + error = device_get_children(bus, &devlist, &devcount); + + if (error != 0) + return (error); + + lowest = NULL; + for (i = 0; i < devcount; i++) { + if (pci_find_extcap(devlist[i], PCIZ_SRIOV, &iov_pos) == 0) { + dev_func = pci_get_function(devlist[i]); + if (lowest == NULL || dev_func < lowest_func) { + lowest = devlist[i]; + lowest_func = dev_func; + lowest_pos = iov_pos; + } + } + } + + /* + * If we called this function some device must have the SR-IOV + * capability. + */ + KASSERT(lowest != NULL, + ("Could not find child of %s with SR-IOV capability", + device_get_nameunit(bus))); + + iov_ctl = pci_read_config(lowest, iov_pos + PCIR_SRIOV_CTL, 2); + iov_ctl |= PCIM_SRIOV_ARI_EN; + pci_write_config(lowest, iov_pos + PCIR_SRIOV_CTL, iov_ctl, 2); + free(devlist, M_TEMP); + return (0); +} + +static int +pci_iov_config_page_size(struct pci_devinfo *dinfo) +{ + uint32_t page_cap, page_size; + + page_cap = IOV_READ(dinfo, PCIR_SRIOV_PAGE_CAP, 4); + + /* + * If the system page size is less than the smallest SR-IOV page size + * then round up to the smallest SR-IOV page size. + */ + if (PAGE_SHIFT < PCI_SRIOV_BASE_PAGE_SHIFT) + page_size = (1 << 0); + else + page_size = (1 << (PAGE_SHIFT - PCI_SRIOV_BASE_PAGE_SHIFT)); + + /* Check that the device supports the system page size. */ + if (!(page_size & page_cap)) + return (ENXIO); + + IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, page_size, 4); + return (0); +} + +static int +pci_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *config) +{ + const nvlist_t *device, *driver_config; + + device = nvlist_get_nvlist(config, PF_CONFIG_NAME); + driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME); + return (PCI_IOV_INIT(dev, num_vfs, driver_config)); +} + +static int +pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov) +{ + int error; + + iov->rman.rm_start = 0; + iov->rman.rm_end = ~0ul; + iov->rman.rm_type = RMAN_ARRAY; + snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory", + device_get_nameunit(pf)); + iov->rman.rm_descr = iov->rman_name; + + error = rman_init(&iov->rman); + if (error != 0) + return (error); + + iov->iov_flags |= IOV_RMAN_INITED; + return (0); +} + +static int +pci_iov_setup_bars(struct pci_devinfo *dinfo) +{ + device_t dev; + struct pcicfg_iov *iov; + pci_addr_t bar_value, testval; + int i, last_64, error; + + iov = dinfo->cfg.iov; + dev = dinfo->cfg.dev; + last_64 = 0; + + for (i = 0; i <= PCIR_MAX_BAR_0; i++) { + /* + * If a PCI BAR is a 64-bit wide BAR, then it spans two + * consecutive registers. Therefore if the last BAR that + * we looked at was a 64-bit BAR, we need to skip this + * register as it's the second half of the last BAR. + */ + if (!last_64) { + pci_read_bar(dev, + iov->iov_pos + PCIR_SRIOV_BAR(i), + &bar_value, &testval, &last_64); + + if (testval != 0) { + error = pci_iov_alloc_bar(dinfo, i, + pci_mapsize(testval)); + if (error != 0) + return (error); + } + } else + last_64 = 0; + } + + return (0); +} + +static void +pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const nvlist_t *config, + uint16_t first_rid, uint16_t rid_stride) +{ + char device_name[VF_MAX_NAME]; + const nvlist_t *device, *driver_config, *iov_config; + device_t bus, dev, vf; + struct pcicfg_iov *iov; + struct pci_devinfo *vfinfo; + size_t size; + int i, error; + uint16_t vid, did, next_rid; + + iov = dinfo->cfg.iov; + dev = dinfo->cfg.dev; + bus = device_get_parent(dev); + size = dinfo->cfg.devinfo_size; + next_rid = first_rid; + vid = pci_get_vendor(dev); + did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2); + + for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) { + snprintf(device_name, sizeof(device_name), VF_PREFIX"%d", i); + device = nvlist_get_nvlist(config, device_name); + iov_config = nvlist_get_nvlist(device, IOV_CONFIG_NAME); + driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME); + + vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did); + if (vf == NULL) + break; + + /* + * If we are creating passthrough devices then force the ppt + * driver to attach to prevent a VF driver from claiming the + * VFs. + */ + if (nvlist_get_bool(iov_config, "passthrough")) + device_set_devclass_fixed(vf, "ppt"); + + vfinfo = device_get_ivars(vf); + + vfinfo->cfg.iov = iov; + vfinfo->cfg.vf.index = i; + + pci_iov_add_bars(iov, vfinfo); + + error = PCI_IOV_ADD_VF(dev, i, driver_config); + if (error != 0) { + device_printf(dev, "Failed to add VF %d\n", i); + pci_delete_child(bus, vf); + } + } + + bus_generic_attach(bus); +} + +static int +pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg) +{ + device_t bus, dev; + struct pci_devinfo *dinfo; + struct pcicfg_iov *iov; + nvlist_t *config; + int i, error; + uint16_t rid_off, rid_stride; + uint16_t first_rid, last_rid; + uint16_t iov_ctl; + uint16_t num_vfs, total_vfs; + int iov_inited; + + mtx_lock(&Giant); + dinfo = cdev->si_drv1; + iov = dinfo->cfg.iov; + dev = dinfo->cfg.dev; + bus = device_get_parent(dev); + iov_inited = 0; + config = NULL; + + if ((iov->iov_flags & IOV_BUSY) || iov->iov_num_vfs != 0) { + mtx_unlock(&Giant); + return (EBUSY); + } + iov->iov_flags |= IOV_BUSY; + + error = pci_iov_parse_config(iov, arg, &config); + if (error != 0) + goto out; + + num_vfs = pci_iov_config_get_num_vfs(config); + total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2); + if (num_vfs > total_vfs) { + error = EINVAL; + goto out; + } + + error = pci_iov_config_page_size(dinfo); + if (error != 0) + goto out; + + error = pci_iov_set_ari(bus); + if (error != 0) + goto out; + + error = pci_iov_init(dev, num_vfs, config); + if (error != 0) + goto out; + iov_inited = 1; + + IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, num_vfs, 2); + + rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2); + rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2); + + first_rid = pci_get_rid(dev) + rid_off; + last_rid = first_rid + (num_vfs - 1) * rid_stride; + + /* We don't yet support allocating extra bus numbers for VFs. */ + if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) { + error = ENOSPC; + goto out; + } + + iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); + iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE); + IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); + + error = pci_iov_init_rman(dev, iov); + if (error != 0) + goto out; + + iov->iov_num_vfs = num_vfs; + + error = pci_iov_setup_bars(dinfo); + if (error != 0) + goto out; + + iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); + iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE; + IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); + + /* Per specification, we must wait 100ms before accessing VFs. */ + pause("iov", roundup(hz, 10)); + pci_iov_enumerate_vfs(dinfo, config, first_rid, rid_stride); + + nvlist_destroy(config); + iov->iov_flags &= ~IOV_BUSY; + mtx_unlock(&Giant); + + return (0); +out: + if (iov_inited) + PCI_IOV_UNINIT(dev); + + for (i = 0; i <= PCIR_MAX_BAR_0; i++) { + if (iov->iov_bar[i].res != NULL) { + pci_release_resource(bus, dev, SYS_RES_MEMORY, + iov->iov_pos + PCIR_SRIOV_BAR(i), + iov->iov_bar[i].res); + pci_delete_resource(bus, dev, SYS_RES_MEMORY, + iov->iov_pos + PCIR_SRIOV_BAR(i)); + iov->iov_bar[i].res = NULL; + } + } + + if (iov->iov_flags & IOV_RMAN_INITED) { + rman_fini(&iov->rman); + iov->iov_flags &= ~IOV_RMAN_INITED; + } + + nvlist_destroy(config); + iov->iov_num_vfs = 0; + iov->iov_flags &= ~IOV_BUSY; + mtx_unlock(&Giant); + return (error); +} + +/* Return true if child is a VF of the given PF. */ +static int +pci_iov_is_child_vf(struct pcicfg_iov *pf, device_t child) +{ + struct pci_devinfo *vfinfo; + + vfinfo = device_get_ivars(child); + + if (!(vfinfo->cfg.flags & PCICFG_VF)) + return (0); + + return (pf == vfinfo->cfg.iov); +} + +static int +pci_iov_delete(struct cdev *cdev) +{ + device_t bus, dev, vf, *devlist; + struct pci_devinfo *dinfo; + struct pcicfg_iov *iov; + int i, error, devcount; + uint32_t iov_ctl; + + mtx_lock(&Giant); + dinfo = cdev->si_drv1; + iov = dinfo->cfg.iov; + dev = dinfo->cfg.dev; + bus = device_get_parent(dev); + devlist = NULL; + + if (iov->iov_flags & IOV_BUSY) { + mtx_unlock(&Giant); + return (EBUSY); + } + + if (iov->iov_num_vfs == 0) { + mtx_unlock(&Giant); + return (ECHILD); + } + + iov->iov_flags |= IOV_BUSY; + + error = device_get_children(bus, &devlist, &devcount); + + if (error != 0) + goto out; + + for (i = 0; i < devcount; i++) { + vf = devlist[i]; + + if (!pci_iov_is_child_vf(iov, vf)) + continue; + + error = device_detach(vf); + if (error != 0) { + device_printf(dev, + "Could not disable SR-IOV: failed to detach VF %s\n", + device_get_nameunit(vf)); + goto out; + } + } + + for (i = 0; i < devcount; i++) { + vf = devlist[i]; + + if (pci_iov_is_child_vf(iov, vf)) + pci_delete_child(bus, vf); + } + PCI_IOV_UNINIT(dev); + + iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); + iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE); + IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); + IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, 0, 2); + + iov->iov_num_vfs = 0; + + for (i = 0; i <= PCIR_MAX_BAR_0; i++) { + if (iov->iov_bar[i].res != NULL) { + pci_release_resource(bus, dev, SYS_RES_MEMORY, + iov->iov_pos + PCIR_SRIOV_BAR(i), + iov->iov_bar[i].res); + pci_delete_resource(bus, dev, SYS_RES_MEMORY, + iov->iov_pos + PCIR_SRIOV_BAR(i)); + iov->iov_bar[i].res = NULL; + } + } + + if (iov->iov_flags & IOV_RMAN_INITED) { + rman_fini(&iov->rman); + iov->iov_flags &= ~IOV_RMAN_INITED; + } + + error = 0; +out: + free(devlist, M_TEMP); + iov->iov_flags &= ~IOV_BUSY; + mtx_unlock(&Giant); + return (error); +} + +static int +pci_iov_get_schema_ioctl(struct cdev *cdev, struct pci_iov_schema *output) +{ + struct pci_devinfo *dinfo; + void *packed; + size_t output_len, size; + int error; + + packed = NULL; + + mtx_lock(&Giant); + dinfo = cdev->si_drv1; + packed = nvlist_pack(dinfo->cfg.iov->iov_schema, &size); + mtx_unlock(&Giant); + + if (packed == NULL) { + error = ENOMEM; + goto fail; + } + + output_len = output->len; + output->len = size; + if (size <= output_len) { + error = copyout(packed, output->schema, size); + + if (error != 0) + goto fail; + + output->error = 0; + } else + /* + * If we return an error then the ioctl code won't copyout + * output back to userland, so we flag the error in the struct + * instead. + */ + output->error = EMSGSIZE; + + error = 0; + +fail: + free(packed, M_NVLIST); + + return (error); +} + +static int +pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, + struct thread *td) +{ + + switch (cmd) { + case IOV_CONFIG: + return (pci_iov_config(dev, (struct pci_iov_arg *)data)); + case IOV_DELETE: + return (pci_iov_delete(dev)); + case IOV_GET_SCHEMA: + return (pci_iov_get_schema_ioctl(dev, + (struct pci_iov_schema *)data)); + default: + return (EINVAL); + } +} + +struct resource * +pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid, u_long start, + u_long end, u_long count, u_int flags) +{ + struct pci_devinfo *dinfo; + struct pcicfg_iov *iov; + struct pci_map *map; + struct resource *res; + struct resource_list_entry *rle; + u_long bar_start, bar_end; + pci_addr_t bar_length; + int error; + + dinfo = device_get_ivars(child); + iov = dinfo->cfg.iov; + + map = pci_find_bar(child, *rid); + if (map == NULL) + return (NULL); + + bar_length = 1 << map->pm_size; + bar_start = map->pm_value; + bar_end = bar_start + bar_length - 1; + + /* Make sure that the resource fits the constraints. */ + if (bar_start >= end || bar_end <= bar_start || count != 1) + return (NULL); + + /* Clamp the resource to the constraints if necessary. */ + if (bar_start < start) + bar_start = start; + if (bar_end > end) + bar_end = end; + bar_length = bar_end - bar_start + 1; + + res = rman_reserve_resource(&iov->rman, bar_start, bar_end, + bar_length, flags, child); + if (res == NULL) + return (NULL); + + rle = resource_list_add(&dinfo->resources, SYS_RES_MEMORY, *rid, + bar_start, bar_end, 1); + if (rle == NULL) { + rman_release_resource(res); + return (NULL); + } + + rman_set_rid(res, *rid); + + if (flags & RF_ACTIVE) { + error = bus_activate_resource(child, SYS_RES_MEMORY, *rid, res); + if (error != 0) { + resource_list_delete(&dinfo->resources, SYS_RES_MEMORY, + *rid); + rman_release_resource(res); + return (NULL); + } + } + rle->res = res; + + return (res); +} + +int +pci_vf_release_mem_resource(device_t dev, device_t child, int rid, + struct resource *r) +{ + struct pci_devinfo *dinfo; + struct resource_list_entry *rle; + int error; + + dinfo = device_get_ivars(child); + + if (rman_get_flags(r) & RF_ACTIVE) { + error = bus_deactivate_resource(child, SYS_RES_MEMORY, rid, r); + if (error != 0) + return (error); + } + + rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, rid); + if (rle != NULL) { + rle->res = NULL; + resource_list_delete(&dinfo->resources, SYS_RES_MEMORY, + rid); + } + + return (rman_release_resource(r)); +} + Index: sys/dev/pci/pci_iov_if.m =================================================================== --- /dev/null +++ sys/dev/pci/pci_iov_if.m @@ -0,0 +1,52 @@ +#- +# Copyright (c) 2013-2015 Sandvine Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD: head/sys/dev/pci/pci_iov_if.m 283670 2015-05-28 22:01:50Z jhb $ +# + +#include + +INTERFACE pci_iov; + +HEADER { + struct nvlist; +} + + +METHOD int init { + device_t dev; + uint16_t num_vfs; + const struct nvlist *config; +}; + +METHOD void uninit { + device_t dev; +}; + +METHOD int add_vf { + device_t dev; + uint16_t vfnum; + const struct nvlist *config; +}; Index: sys/dev/pci/pci_iov_private.h =================================================================== --- /dev/null +++ sys/dev/pci/pci_iov_private.h @@ -0,0 +1,56 @@ +/*- + * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/sys/dev/pci/pci_iov_private.h 279451 2015-03-01 00:40:42Z rstone $ + */ + +#ifndef _PCI_IOV_PRIVATE_H_ +#define _PCI_IOV_PRIVATE_H_ + +struct pci_iov_bar { + struct resource *res; + + pci_addr_t bar_size; + pci_addr_t bar_shift; +}; + +struct pcicfg_iov { + struct cdev *iov_cdev; + nvlist_t *iov_schema; + + struct pci_iov_bar iov_bar[PCIR_MAX_BAR_0 + 1]; + struct rman rman; + char rman_name[64]; + + int iov_pos; + int iov_num_vfs; + uint32_t iov_flags; +}; + +#define IOV_RMAN_INITED 0x0001 +#define IOV_BUSY 0x0002 + +#endif + Index: sys/dev/pci/pci_iov_schema.c =================================================================== --- /dev/null +++ sys/dev/pci/pci_iov_schema.c @@ -0,0 +1,869 @@ +/*- + * Copyright (c) 2014-2015 Sandvine Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: head/sys/dev/pci/pci_iov_schema.c 279465 2015-03-01 00:59:28Z rstone $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include + +#include + +struct config_type_validator; +typedef int (validate_func)(const struct config_type_validator *, + const nvlist_t *, const char *name); +typedef int (default_validate_t)(const struct config_type_validator *, + const nvlist_t *); + +static validate_func pci_iov_schema_validate_bool; +static validate_func pci_iov_schema_validate_string; +static validate_func pci_iov_schema_validate_uint; +static validate_func pci_iov_schema_validate_unicast_mac; + +static default_validate_t pci_iov_validate_bool_default; +static default_validate_t pci_iov_validate_string_default; +static default_validate_t pci_iov_validate_uint_default; +static default_validate_t pci_iov_validate_unicast_mac_default; + +struct config_type_validator { + const char *type_name; + validate_func *validate; + default_validate_t *default_validate; + uintmax_t limit; +}; + +static struct config_type_validator pci_iov_schema_validators[] = { + { + .type_name = "bool", + .validate = pci_iov_schema_validate_bool, + .default_validate = pci_iov_validate_bool_default + }, + { + .type_name = "string", + .validate = pci_iov_schema_validate_string, + .default_validate = pci_iov_validate_string_default + }, + { + .type_name = "uint8_t", + .validate = pci_iov_schema_validate_uint, + .default_validate = pci_iov_validate_uint_default, + .limit = UINT8_MAX + }, + { + .type_name = "uint16_t", + .validate = pci_iov_schema_validate_uint, + .default_validate = pci_iov_validate_uint_default, + .limit = UINT16_MAX + }, + { + .type_name = "uint32_t", + .validate = pci_iov_schema_validate_uint, + .default_validate = pci_iov_validate_uint_default, + .limit = UINT32_MAX + }, + { + .type_name = "uint64_t", + .validate = pci_iov_schema_validate_uint, + .default_validate = pci_iov_validate_uint_default, + .limit = UINT64_MAX + }, + { + .type_name = "unicast-mac", + .validate = pci_iov_schema_validate_unicast_mac, + .default_validate = pci_iov_validate_unicast_mac_default, + }, +}; + +static const struct config_type_validator * +pci_iov_schema_find_validator(const char *type) +{ + struct config_type_validator *validator; + int i; + + for (i = 0; i < nitems(pci_iov_schema_validators); i++) { + validator = &pci_iov_schema_validators[i]; + if (strcmp(type, validator->type_name) == 0) + return (validator); + } + + return (NULL); +} + +static void +pci_iov_schema_add_type(nvlist_t *entry, const char *type) +{ + + if (pci_iov_schema_find_validator(type) == NULL) { + nvlist_set_error(entry, EINVAL); + return; + } + nvlist_add_string(entry, "type", type); +} + +static void +pci_iov_schema_add_required(nvlist_t *entry, uint32_t flags) +{ + + if (flags & IOV_SCHEMA_REQUIRED) { + if (flags & IOV_SCHEMA_HASDEFAULT) { + nvlist_set_error(entry, EINVAL); + return; + } + + nvlist_add_bool(entry, "required", 1); + } +} + +void +pci_iov_schema_add_bool(nvlist_t *schema, const char *name, uint32_t flags, + int defaultVal) +{ + nvlist_t *entry; + + entry = nvlist_create(NV_FLAG_IGNORE_CASE); + if (entry == NULL) { + nvlist_set_error(schema, ENOMEM); + return; + } + + pci_iov_schema_add_type(entry, "bool"); + if (flags & IOV_SCHEMA_HASDEFAULT) + nvlist_add_bool(entry, "default", defaultVal); + pci_iov_schema_add_required(entry, flags); + + nvlist_move_nvlist(schema, name, entry); +} + +void +pci_iov_schema_add_string(nvlist_t *schema, const char *name, uint32_t flags, + const char *defaultVal) +{ + nvlist_t *entry; + + entry = nvlist_create(NV_FLAG_IGNORE_CASE); + if (entry == NULL) { + nvlist_set_error(schema, ENOMEM); + return; + } + + pci_iov_schema_add_type(entry, "string"); + if (flags & IOV_SCHEMA_HASDEFAULT) + nvlist_add_string(entry, "default", defaultVal); + pci_iov_schema_add_required(entry, flags); + + nvlist_move_nvlist(schema, name, entry); +} + +static void +pci_iov_schema_int(nvlist_t *schema, const char *name, const char *type, + uint32_t flags, uint64_t defaultVal) +{ + nvlist_t *entry; + + entry = nvlist_create(NV_FLAG_IGNORE_CASE); + if (entry == NULL) { + nvlist_set_error(schema, ENOMEM); + return; + } + + pci_iov_schema_add_type(entry, type); + if (flags & IOV_SCHEMA_HASDEFAULT) + nvlist_add_number(entry, "default", defaultVal); + pci_iov_schema_add_required(entry, flags); + + nvlist_move_nvlist(schema, name, entry); +} + +void +pci_iov_schema_add_uint8(nvlist_t *schema, const char *name, uint32_t flags, + uint8_t defaultVal) +{ + + pci_iov_schema_int(schema, name, "uint8_t", flags, defaultVal); +} + +void +pci_iov_schema_add_uint16(nvlist_t *schema, const char *name, uint32_t flags, + uint16_t defaultVal) +{ + + pci_iov_schema_int(schema, name, "uint16_t", flags, defaultVal); +} + +void +pci_iov_schema_add_uint32(nvlist_t *schema, const char *name, uint32_t flags, + uint32_t defaultVal) +{ + + pci_iov_schema_int(schema, name, "uint32_t", flags, defaultVal); +} + +void +pci_iov_schema_add_uint64(nvlist_t *schema, const char *name, uint32_t flags, + uint64_t defaultVal) +{ + + pci_iov_schema_int(schema, name, "uint64_t", flags, defaultVal); +} + +void +pci_iov_schema_add_unicast_mac(nvlist_t *schema, const char *name, + uint32_t flags, const uint8_t * defaultVal) +{ + nvlist_t *entry; + + entry = nvlist_create(NV_FLAG_IGNORE_CASE); + if (entry == NULL) { + nvlist_set_error(schema, ENOMEM); + return; + } + + pci_iov_schema_add_type(entry, "unicast-mac"); + if (flags & IOV_SCHEMA_HASDEFAULT) + nvlist_add_binary(entry, "default", defaultVal, ETHER_ADDR_LEN); + pci_iov_schema_add_required(entry, flags); + + nvlist_move_nvlist(schema, name, entry); +} + +static int +pci_iov_schema_validate_bool(const struct config_type_validator * validator, + const nvlist_t *config, const char *name) +{ + + if (!nvlist_exists_bool(config, name)) + return (EINVAL); + return (0); +} + +static int +pci_iov_schema_validate_string(const struct config_type_validator * validator, + const nvlist_t *config, const char *name) +{ + + if (!nvlist_exists_string(config, name)) + return (EINVAL); + return (0); +} + +static int +pci_iov_schema_validate_uint(const struct config_type_validator * validator, + const nvlist_t *config, const char *name) +{ + uint64_t value; + + if (!nvlist_exists_number(config, name)) + return (EINVAL); + + value = nvlist_get_number(config, name); + + if (value > validator->limit) + return (EINVAL); + + return (0); +} + +static int +pci_iov_schema_validate_unicast_mac( + const struct config_type_validator * validator, + const nvlist_t *config, const char *name) +{ + const uint8_t *mac; + size_t size; + + if (!nvlist_exists_binary(config, name)) + return (EINVAL); + + mac = nvlist_get_binary(config, name, &size); + + if (size != ETHER_ADDR_LEN) + return (EINVAL); + + if (ETHER_IS_MULTICAST(mac)) + return (EINVAL); + + return (0); +} + +static void +pci_iov_config_add_default(const nvlist_t *param_schema, const char *name, + nvlist_t *config) +{ + const void *binary; + size_t len; + + if (nvlist_exists_binary(param_schema, "default")) { + binary = nvlist_get_binary(param_schema, "default", &len); + nvlist_add_binary(config, name, binary, len); + } else if (nvlist_exists_bool(param_schema, "default")) + nvlist_add_bool(config, name, + nvlist_get_bool(param_schema, "default")); + else if (nvlist_exists_number(param_schema, "default")) + nvlist_add_number(config, name, + nvlist_get_number(param_schema, "default")); + else if (nvlist_exists_nvlist(param_schema, "default")) + nvlist_add_nvlist(config, name, + nvlist_get_nvlist(param_schema, "default")); + else if (nvlist_exists_string(param_schema, "default")) + nvlist_add_string(config, name, + nvlist_get_string(param_schema, "default")); + else + panic("Unexpected nvlist type"); +} + +static int +pci_iov_validate_bool_default(const struct config_type_validator * validator, + const nvlist_t *param) +{ + + if (!nvlist_exists_bool(param, DEFAULT_SCHEMA_NAME)) + return (EINVAL); + return (0); +} + +static int +pci_iov_validate_string_default(const struct config_type_validator * validator, + const nvlist_t *param) +{ + + if (!nvlist_exists_string(param, DEFAULT_SCHEMA_NAME)) + return (EINVAL); + return (0); +} + +static int +pci_iov_validate_uint_default(const struct config_type_validator * validator, + const nvlist_t *param) +{ + uint64_t defaultVal; + + if (!nvlist_exists_number(param, DEFAULT_SCHEMA_NAME)) + return (EINVAL); + + defaultVal = nvlist_get_number(param, DEFAULT_SCHEMA_NAME); + if (defaultVal > validator->limit) + return (EINVAL); + return (0); +} + +static int +pci_iov_validate_unicast_mac_default( + const struct config_type_validator * validator, const nvlist_t *param) +{ + const uint8_t *mac; + size_t size; + + if (!nvlist_exists_binary(param, DEFAULT_SCHEMA_NAME)) + return (EINVAL); + + mac = nvlist_get_binary(param, DEFAULT_SCHEMA_NAME, &size); + if (size != ETHER_ADDR_LEN) + return (EINVAL); + + if (ETHER_IS_MULTICAST(mac)) + return (EINVAL); + return (0); +} + +static int +pci_iov_validate_param_schema(const nvlist_t *schema) +{ + const struct config_type_validator *validator; + const char *type; + int error; + + /* All parameters must define a type. */ + if (!nvlist_exists_string(schema, TYPE_SCHEMA_NAME)) + return (EINVAL); + type = nvlist_get_string(schema, TYPE_SCHEMA_NAME); + + validator = pci_iov_schema_find_validator(type); + if (validator == NULL) + return (EINVAL); + + /* Validate that the default value conforms to the type. */ + if (nvlist_exists(schema, DEFAULT_SCHEMA_NAME)) { + error = validator->default_validate(validator, schema); + if (error != 0) + return (error); + + /* Required and Default are mutually exclusive. */ + if (nvlist_exists(schema, REQUIRED_SCHEMA_NAME)) + return (EINVAL); + } + + /* The "Required" field must be a bool. */ + if (nvlist_exists(schema, REQUIRED_SCHEMA_NAME)) { + if (!nvlist_exists_bool(schema, REQUIRED_SCHEMA_NAME)) + return (EINVAL); + } + + return (0); +} + +static int +pci_iov_validate_subsystem_schema(const nvlist_t *dev_schema, const char *name) +{ + const nvlist_t *sub_schema, *param_schema; + const char *param_name; + void *it; + int type, error; + + if (!nvlist_exists_nvlist(dev_schema, name)) + return (EINVAL); + sub_schema = nvlist_get_nvlist(dev_schema, name); + + it = NULL; + while ((param_name = nvlist_next(sub_schema, &type, &it)) != NULL) { + if (type != NV_TYPE_NVLIST) + return (EINVAL); + param_schema = nvlist_get_nvlist(sub_schema, param_name); + + error = pci_iov_validate_param_schema(param_schema); + if (error != 0) + return (error); + } + + return (0); +} + +/* + * Validate that the driver schema does not define any configuration parameters + * whose names collide with configuration parameters defined in the iov schema. + */ +static int +pci_iov_validate_param_collisions(const nvlist_t *dev_schema) +{ + const nvlist_t *iov_schema, *driver_schema; + const char *name; + void *it; + int type; + + driver_schema = nvlist_get_nvlist(dev_schema, DRIVER_CONFIG_NAME); + iov_schema = nvlist_get_nvlist(dev_schema, IOV_CONFIG_NAME); + + it = NULL; + while ((name = nvlist_next(driver_schema, &type, &it)) != NULL) { + if (nvlist_exists(iov_schema, name)) + return (EINVAL); + } + + return (0); +} + +/* + * Validate that we only have IOV and DRIVER subsystems beneath the given + * device schema node. + */ +static int +pci_iov_validate_schema_subsystems(const nvlist_t *dev_schema) +{ + const char *name; + void *it; + int type; + + it = NULL; + while ((name = nvlist_next(dev_schema, &type, &it)) != NULL) { + if (strcmp(name, IOV_CONFIG_NAME) != 0 && + strcmp(name, DRIVER_CONFIG_NAME) != 0) + return (EINVAL); + } + + return (0); +} + +static int +pci_iov_validate_device_schema(const nvlist_t *schema, const char *name) +{ + const nvlist_t *dev_schema; + int error; + + if (!nvlist_exists_nvlist(schema, name)) + return (EINVAL); + dev_schema = nvlist_get_nvlist(schema, name); + + error = pci_iov_validate_subsystem_schema(dev_schema, IOV_CONFIG_NAME); + if (error != 0) + return (error); + + error = pci_iov_validate_subsystem_schema(dev_schema, + DRIVER_CONFIG_NAME); + if (error != 0) + return (error); + + error = pci_iov_validate_param_collisions(dev_schema); + if (error != 0) + return (error); + + return (pci_iov_validate_schema_subsystems(dev_schema)); +} + +/* Validate that we only have PF and VF devices beneath the top-level schema. */ +static int +pci_iov_validate_schema_devices(const nvlist_t *dev_schema) +{ + const char *name; + void *it; + int type; + + it = NULL; + while ((name = nvlist_next(dev_schema, &type, &it)) != NULL) { + if (strcmp(name, PF_CONFIG_NAME) != 0 && + strcmp(name, VF_SCHEMA_NAME) != 0) + return (EINVAL); + } + + return (0); +} + +int +pci_iov_validate_schema(const nvlist_t *schema) +{ + int error; + + error = pci_iov_validate_device_schema(schema, PF_CONFIG_NAME); + if (error != 0) + return (error); + + error = pci_iov_validate_device_schema(schema, VF_SCHEMA_NAME); + if (error != 0) + return (error); + + return (pci_iov_validate_schema_devices(schema)); +} + +/* + * Validate that all required parameters from the schema are specified in the + * config. If any parameter with a default value is not specified in the + * config, add it to config. + */ +static int +pci_iov_schema_validate_required(const nvlist_t *schema, nvlist_t *config) +{ + const nvlist_t *param_schema; + const char *name; + void *cookie; + int type; + + cookie = NULL; + while ((name = nvlist_next(schema, &type, &cookie)) != NULL) { + param_schema = nvlist_get_nvlist(schema, name); + + if (dnvlist_get_bool(param_schema, "required", 0)) { + if (!nvlist_exists(config, name)) + return (EINVAL); + } + + if (nvlist_exists(param_schema, "default") && + !nvlist_exists(config, name)) + pci_iov_config_add_default(param_schema, name, config); + } + + return (nvlist_error(config)); +} + +static int +pci_iov_schema_validate_param(const nvlist_t *schema_param, const char *name, + const nvlist_t *config) +{ + const struct config_type_validator *validator; + const char *type; + + type = nvlist_get_string(schema_param, "type"); + validator = pci_iov_schema_find_validator(type); + + KASSERT(validator != NULL, + ("Schema was not validated: Unknown type %s", type)); + + return (validator->validate(validator, config, name)); +} + +/* + * Validate that all parameters in config are defined in the schema. Also + * validate that the type of the parameter matches the type in the schema. + */ +static int +pci_iov_schema_validate_types(const nvlist_t *schema, const nvlist_t *config) +{ + const nvlist_t *schema_param; + void *cookie; + const char *name; + int type, error; + + cookie = NULL; + while ((name = nvlist_next(config, &type, &cookie)) != NULL) { + if (!nvlist_exists_nvlist(schema, name)) + return (EINVAL); + + schema_param = nvlist_get_nvlist(schema, name); + + error = pci_iov_schema_validate_param(schema_param, name, + config); + + if (error != 0) + return (error); + } + + return (0); +} + +static int +pci_iov_schema_validate_device(const nvlist_t *schema, nvlist_t *config, + const char *schema_device, const char *config_device) +{ + const nvlist_t *device_schema, *iov_schema, *driver_schema; + nvlist_t *device_config, *iov_config, *driver_config; + int error; + + device_config = NULL; + iov_config = NULL; + driver_config = NULL; + + device_schema = nvlist_get_nvlist(schema, schema_device); + iov_schema = nvlist_get_nvlist(device_schema, IOV_CONFIG_NAME); + driver_schema = nvlist_get_nvlist(device_schema, DRIVER_CONFIG_NAME); + + device_config = dnvlist_take_nvlist(config, config_device, NULL); + if (device_config == NULL) { + error = EINVAL; + goto out; + } + + iov_config = dnvlist_take_nvlist(device_config, IOV_CONFIG_NAME, NULL); + if (iov_config == NULL) { + error = EINVAL; + goto out; + } + + driver_config = dnvlist_take_nvlist(device_config, DRIVER_CONFIG_NAME, + NULL); + if (driver_config == NULL) { + error = EINVAL; + goto out; + } + + error = pci_iov_schema_validate_required(iov_schema, iov_config); + if (error != 0) + goto out; + + error = pci_iov_schema_validate_required(driver_schema, driver_config); + if (error != 0) + goto out; + + error = pci_iov_schema_validate_types(iov_schema, iov_config); + if (error != 0) + goto out; + + error = pci_iov_schema_validate_types(driver_schema, driver_config); + if (error != 0) + goto out; + +out: + /* Note that these functions handle NULL pointers safely. */ + nvlist_move_nvlist(device_config, IOV_CONFIG_NAME, iov_config); + nvlist_move_nvlist(device_config, DRIVER_CONFIG_NAME, driver_config); + nvlist_move_nvlist(config, config_device, device_config); + + return (error); +} + +static int +pci_iov_schema_validate_vfs(const nvlist_t *schema, nvlist_t *config, + uint16_t num_vfs) +{ + char device[VF_MAX_NAME]; + int i, error; + + for (i = 0; i < num_vfs; i++) { + snprintf(device, sizeof(device), VF_PREFIX"%d", i); + + error = pci_iov_schema_validate_device(schema, config, + VF_SCHEMA_NAME, device); + if (error != 0) + return (error); + } + + return (0); +} + +/* + * Validate that the device node only has IOV and DRIVER subnodes. + */ +static int +pci_iov_schema_validate_device_subsystems(const nvlist_t *config) +{ + void *cookie; + const char *name; + int type; + + cookie = NULL; + while ((name = nvlist_next(config, &type, &cookie)) != NULL) { + if (strcasecmp(name, IOV_CONFIG_NAME) == 0) + continue; + else if (strcasecmp(name, DRIVER_CONFIG_NAME) == 0) + continue; + + return (EINVAL); + } + + return (0); +} + +/* + * Validate that the string is a valid device node name. It must either be "PF" + * or "VF-n", where n is an integer in the range [0, num_vfs). + */ +static int +pci_iov_schema_validate_dev_name(const char *name, uint16_t num_vfs) +{ + const char *number_start; + char *endp; + u_long vf_num; + + if (strcasecmp(PF_CONFIG_NAME, name) == 0) + return (0); + + /* Ensure that we start with "VF-" */ + if (strncasecmp(name, VF_PREFIX, VF_PREFIX_LEN) != 0) + return (EINVAL); + + number_start = name + VF_PREFIX_LEN; + + /* Filter out name == "VF-" (no number) */ + if (number_start[0] == '\0') + return (EINVAL); + + /* Disallow leading whitespace or +/- */ + if (!isdigit(number_start[0])) + return (EINVAL); + + vf_num = strtoul(number_start, &endp, 10); + if (*endp != '\0') + return (EINVAL); + + /* Disallow leading zeros on VF-[1-9][0-9]* */ + if (vf_num != 0 && number_start[0] == '0') + return (EINVAL); + + /* Disallow leading zeros on VF-0 */ + if (vf_num == 0 && number_start[1] != '\0') + return (EINVAL); + + if (vf_num >= num_vfs) + return (EINVAL); + + return (0); +} + +/* + * Validate that there are no device nodes in config other than the ones for + * the PF and the VFs. This includes validating that all config nodes of the + * form VF-n specify a VF number that is < num_vfs. + */ +static int +pci_iov_schema_validate_device_names(const nvlist_t *config, uint16_t num_vfs) +{ + const nvlist_t *device; + void *cookie; + const char *name; + int type, error; + + cookie = NULL; + while ((name = nvlist_next(config, &type, &cookie)) != NULL) { + error = pci_iov_schema_validate_dev_name(name, num_vfs); + if (error != 0) + return (error); + + /* + * Note that as this is a valid PF/VF node, we know that + * pci_iov_schema_validate_device() has already checked that + * the PF/VF node is an nvlist. + */ + device = nvlist_get_nvlist(config, name); + error = pci_iov_schema_validate_device_subsystems(device); + if (error != 0) + return (error); + } + + return (0); +} + +int +pci_iov_schema_validate_config(const nvlist_t *schema, nvlist_t *config) +{ + int error; + uint16_t num_vfs; + + error = pci_iov_schema_validate_device(schema, config, PF_CONFIG_NAME, + PF_CONFIG_NAME); + if (error != 0) + return (error); + + num_vfs = pci_iov_config_get_num_vfs(config); + + error = pci_iov_schema_validate_vfs(schema, config, num_vfs); + if (error != 0) + return (error); + + return (pci_iov_schema_validate_device_names(config, num_vfs)); +} + +/* + * Return value of the num_vfs parameter. config must have already been + * validated, which guarantees that the parameter exists. + */ +uint16_t +pci_iov_config_get_num_vfs(const nvlist_t *config) +{ + const nvlist_t *pf, *iov; + + pf = nvlist_get_nvlist(config, PF_CONFIG_NAME); + iov = nvlist_get_nvlist(pf, IOV_CONFIG_NAME); + return (nvlist_get_number(iov, "num_vfs")); +} + +/* Allocate a new empty schema node. */ +nvlist_t * +pci_iov_schema_alloc_node(void) +{ + + return (nvlist_create(NV_FLAG_IGNORE_CASE)); +} Index: sys/dev/pci/pci_pci.c =================================================================== --- sys/dev/pci/pci_pci.c +++ sys/dev/pci/pci_pci.c @@ -57,13 +57,16 @@ static int pcib_power_for_sleep(device_t pcib, device_t dev, int *pstate); static uint16_t pcib_ari_get_rid(device_t pcib, device_t dev); -static uint32_t pcib_read_config(device_t dev, u_int b, u_int s, +static uint32_t pcib_read_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, int width); static void pcib_write_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, uint32_t val, int width); static int pcib_ari_maxslots(device_t dev); static int pcib_ari_maxfuncs(device_t dev); static int pcib_try_enable_ari(device_t pcib, device_t dev); +static int pcib_ari_enabled(device_t pcib); +static void pcib_ari_decode_rid(device_t pcib, uint16_t rid, + int *bus, int *slot, int *func); static device_method_t pcib_methods[] = { /* Device interface */ @@ -104,6 +107,8 @@ DEVMETHOD(pcib_power_for_sleep, pcib_power_for_sleep), DEVMETHOD(pcib_get_rid, pcib_ari_get_rid), DEVMETHOD(pcib_try_enable_ari, pcib_try_enable_ari), + DEVMETHOD(pcib_ari_enabled, pcib_ari_enabled), + DEVMETHOD(pcib_decode_rid, pcib_ari_decode_rid), DEVMETHOD_END }; @@ -1867,6 +1872,24 @@ return (PCI_FUNCMAX); } +static void +pcib_ari_decode_rid(device_t pcib, uint16_t rid, int *bus, int *slot, + int *func) +{ + struct pcib_softc *sc; + + sc = device_get_softc(pcib); + + *bus = PCI_RID2BUS(rid); + if (sc->flags & PCIB_ENABLE_ARI) { + *slot = PCIE_ARI_RID2SLOT(rid); + *func = PCIE_ARI_RID2FUNC(rid); + } else { + *slot = PCI_RID2SLOT(rid); + *func = PCI_RID2FUNC(rid); + } +} + /* * Since we are a child of a PCI bus, its parent must support the pcib interface. */ @@ -1998,6 +2021,16 @@ return (PCIB_POWER_FOR_SLEEP(bus, dev, pstate)); } +static int +pcib_ari_enabled(device_t pcib) +{ + struct pcib_softc *sc; + + sc = device_get_softc(pcib); + + return ((sc->flags & PCIB_ENABLE_ARI) != 0); +} + static uint16_t pcib_ari_get_rid(device_t pcib, device_t dev) { Index: sys/dev/pci/pci_private.h =================================================================== --- sys/dev/pci/pci_private.h +++ sys/dev/pci/pci_private.h @@ -51,6 +51,8 @@ void pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size); void pci_add_child(device_t bus, struct pci_devinfo *dinfo); +device_t pci_add_iov_child(device_t bus, device_t pf, size_t dinfo_size, + uint16_t rid, uint16_t vid, uint16_t did); void pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask); int pci_attach_common(device_t dev); @@ -133,4 +135,26 @@ */ void pci_cfg_save(device_t, struct pci_devinfo *, int); +int pci_mapsize(uint64_t testval); +void pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, + pci_addr_t *testvalp, int *bar64); +struct pci_map *pci_add_bar(device_t dev, int reg, pci_addr_t value, + pci_addr_t size); + +struct resource *pci_alloc_multi_resource(device_t dev, device_t child, + int type, int *rid, u_long start, u_long end, u_long count, + u_long num, u_int flags); + +int pci_iov_attach_method(device_t bus, device_t dev, + struct nvlist *pf_schema, struct nvlist *vf_schema); +int pci_iov_detach_method(device_t bus, device_t dev); + +device_t pci_create_iov_child_method(device_t bus, device_t pf, + uint16_t rid, uint16_t vid, uint16_t did); + +struct resource *pci_vf_alloc_mem_resource(device_t dev, device_t child, + int *rid, u_long start, u_long end, u_long count, + u_int flags); +int pci_vf_release_mem_resource(device_t dev, device_t child, + int rid, struct resource *r); #endif /* _PCI_PRIVATE_H_ */ Index: sys/dev/pci/pci_user.c =================================================================== --- sys/dev/pci/pci_user.c +++ sys/dev/pci/pci_user.c @@ -492,7 +492,7 @@ static int pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) { - device_t pcidev, brdev; + device_t pcidev; void *confdata; const char *name; struct devlist *devlist_head; @@ -922,37 +922,25 @@ io->pi_sel.pc_bus, io->pi_sel.pc_dev, io->pi_sel.pc_func); if (pcidev) { - brdev = device_get_parent( - device_get_parent(pcidev)); - #ifdef PRE7_COMPAT if (cmd == PCIOCWRITE || cmd == PCIOCWRITE_OLD) #else if (cmd == PCIOCWRITE) #endif - PCIB_WRITE_CONFIG(brdev, - io->pi_sel.pc_bus, - io->pi_sel.pc_dev, - io->pi_sel.pc_func, + pci_write_config(pcidev, io->pi_reg, io->pi_data, io->pi_width); #ifdef PRE7_COMPAT else if (cmd == PCIOCREAD_OLD) io_old->pi_data = - PCIB_READ_CONFIG(brdev, - io->pi_sel.pc_bus, - io->pi_sel.pc_dev, - io->pi_sel.pc_func, + pci_read_config(pcidev, io->pi_reg, io->pi_width); #endif else io->pi_data = - PCIB_READ_CONFIG(brdev, - io->pi_sel.pc_bus, - io->pi_sel.pc_dev, - io->pi_sel.pc_func, + pci_read_config(pcidev, io->pi_reg, io->pi_width); error = 0; Index: sys/dev/pci/pcib_if.m =================================================================== --- sys/dev/pci/pcib_if.m +++ sys/dev/pci/pcib_if.m @@ -23,7 +23,7 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -# $FreeBSD$ +# $FreeBSD: head/sys/dev/pci/pcib_if.m 289494 2015-10-18 08:13:51Z jmg $ # #include @@ -39,6 +39,13 @@ { return (PCI_INVALID_IRQ); } + + static int + pcib_null_ari_enabled(device_t pcib) + { + + return (0); + } }; # @@ -90,7 +97,7 @@ }; # -# Route an interrupt. Returns a value suitable for stuffing into +# Route an interrupt. Returns a value suitable for stuffing into # a device's interrupt register. # METHOD int route_interrupt { @@ -182,3 +189,20 @@ device_t dev; }; +# +# Return non-zero if PCI ARI is enabled, or zero otherwise +# +METHOD int ari_enabled { + device_t pcib; +} DEFAULT pcib_null_ari_enabled; + +# +# Decode a PCI Routing Identifier (RID) into PCI bus/slot/function +# +METHOD void decode_rid { + device_t pcib; + uint16_t rid; + int *bus; + int *slot; + int *func; +} DEFAULT pcib_decode_rid; Index: sys/dev/pci/pcib_private.h =================================================================== --- sys/dev/pci/pcib_private.h +++ sys/dev/pci/pcib_private.h @@ -170,5 +170,7 @@ int pcib_release_msix(device_t pcib, device_t dev, int irq); int pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data); uint16_t pcib_get_rid(device_t pcib, device_t dev); +void pcib_decode_rid(device_t pcib, uint16_t rid, int *bus, + int *slot, int *func); #endif Index: sys/dev/pci/pcib_support.c =================================================================== --- sys/dev/pci/pcib_support.c +++ sys/dev/pci/pcib_support.c @@ -66,3 +66,13 @@ return (PCI_RID(bus, slot, func)); } +void +pcib_decode_rid(device_t pcib, uint16_t rid, int *bus, int *slot, + int *func) +{ + + *bus = PCI_RID2BUS(rid); + *slot = PCI_RID2SLOT(rid); + *func = PCI_RID2FUNC(rid); +} + Index: sys/dev/pci/pcireg.h =================================================================== --- sys/dev/pci/pcireg.h +++ sys/dev/pci/pcireg.h @@ -68,6 +68,10 @@ #define PCI_RID2SLOT(rid) (((rid) >> PCI_RID_SLOT_SHIFT) & PCI_SLOTMAX) #define PCI_RID2FUNC(rid) (((rid) >> PCI_RID_FUNC_SHIFT) & PCI_FUNCMAX) +#define PCIE_ARI_RID2SLOT(rid) (0) +#define PCIE_ARI_RID2FUNC(rid) \ + (((rid) >> PCI_RID_FUNC_SHIFT) & PCIE_ARI_FUNCMAX) + #define PCIE_ARI_SLOT(func) (((func) >> PCI_RID_SLOT_SHIFT) & PCI_SLOTMAX) #define PCIE_ARI_FUNC(func) (((func) >> PCI_RID_FUNC_SHIFT) & PCI_FUNCMAX) @@ -920,3 +924,21 @@ #define PCIR_SERIAL_LOW 0x04 #define PCIR_SERIAL_HIGH 0x08 +/* SR-IOV definitions */ +#define PCIR_SRIOV_CTL 0x08 +#define PCIM_SRIOV_VF_EN 0x01 +#define PCIM_SRIOV_VF_MSE 0x08 /* Memory space enable. */ +#define PCIM_SRIOV_ARI_EN 0x10 +#define PCIR_SRIOV_TOTAL_VFS 0x0E +#define PCIR_SRIOV_NUM_VFS 0x10 +#define PCIR_SRIOV_VF_OFF 0x14 +#define PCIR_SRIOV_VF_STRIDE 0x16 +#define PCIR_SRIOV_VF_DID 0x1A +#define PCIR_SRIOV_PAGE_CAP 0x1C +#define PCIR_SRIOV_PAGE_SIZE 0x20 + +#define PCI_SRIOV_BASE_PAGE_SHIFT 12 + +#define PCIR_SRIOV_BARS 0x24 +#define PCIR_SRIOV_BAR(x) (PCIR_SRIOV_BARS + (x) * 4) + Index: sys/dev/pci/pcivar.h =================================================================== --- sys/dev/pci/pcivar.h +++ sys/dev/pci/pcivar.h @@ -143,6 +143,12 @@ uint8_t pcix_location; /* Offset of PCI-X capability registers. */ }; +struct pcicfg_vf { + int index; +}; + +#define PCICFG_VF 0x0001 /* Device is an SR-IOV Virtual Function */ + /* config header information common to all header types */ typedef struct pcicfg { struct device *dev; /* device which owns this */ @@ -179,6 +185,9 @@ uint8_t slot; /* config space slot address */ uint8_t func; /* config space function number */ + uint32_t flags; /* flags defined above */ + size_t devinfo_size; /* Size of devinfo for this bus type. */ + struct pcicfg_pp pp; /* Power management */ struct pcicfg_vpd vpd; /* Vital product data */ struct pcicfg_msi msi; /* PCI MSI */ @@ -186,6 +195,8 @@ struct pcicfg_ht ht; /* HyperTransport */ struct pcicfg_pcie pcie; /* PCI Express */ struct pcicfg_pcix pcix; /* PCI-X */ + struct pcicfg_iov *iov; /* SR-IOV */ + struct pcicfg_vf vf; /* SR-IOV Virtual Function */ } pcicfgregs; /* additional type 1 device config header information (PCI to PCI bridge) */ Index: sys/dev/pci/schema_private.h =================================================================== --- /dev/null +++ sys/dev/pci/schema_private.h @@ -0,0 +1,37 @@ +/*- + * Copyright (c) 2014 Sandvine Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SCHEMA_PRIVATE_H_ +#define _SCHEMA_PRIVATE_H_ + +int pci_iov_validate_schema(const nvlist_t *schema); + +int pci_iov_schema_validate_config(const nvlist_t *, nvlist_t *); +uint16_t pci_iov_config_get_num_vfs(const nvlist_t *); + +#endif Index: sys/i386/conf/GENERIC =================================================================== --- sys/i386/conf/GENERIC +++ sys/i386/conf/GENERIC @@ -91,6 +91,7 @@ device acpi device eisa device pci +device PCI_IOV # PCI SR-IOV support # Floppy drives device fdc Index: sys/kern/subr_bus.c =================================================================== --- sys/kern/subr_bus.c +++ sys/kern/subr_bus.c @@ -2679,6 +2679,25 @@ } /** + * @brief Set the devclass of a device and mark the devclass fixed. + * @see device_set_devclass() + */ +int +device_set_devclass_fixed(device_t dev, const char *classname) +{ + int error; + + if (classname == NULL) + return (EINVAL); + + error = device_set_devclass(dev, classname); + if (error) + return (error); + dev->flags |= DF_FIXEDCLASS; + return (0); +} + +/** * @brief Set the driver of a device * * @retval 0 success Index: sys/sys/bus.h =================================================================== --- sys/sys/bus.h +++ sys/sys/bus.h @@ -465,6 +465,7 @@ void device_set_desc(device_t dev, const char* desc); void device_set_desc_copy(device_t dev, const char* desc); int device_set_devclass(device_t dev, const char *classname); +int device_set_devclass_fixed(device_t dev, const char *classname); int device_set_driver(device_t dev, driver_t *driver); void device_set_flags(device_t dev, u_int32_t flags); void device_set_softc(device_t dev, void *softc); Index: sys/sys/iov.h =================================================================== --- /dev/null +++ sys/sys/iov.h @@ -0,0 +1,257 @@ +/*- + * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/sys/sys/iov.h 279453 2015-03-01 00:40:57Z rstone $ + */ + +#ifndef _SYS_IOV_H_ +#define _SYS_IOV_H_ + +#include + +#define PF_CONFIG_NAME "PF" +#define VF_SCHEMA_NAME "VF" + +#define VF_PREFIX "VF-" +#define VF_PREFIX_LEN 3 +#define VF_NUM_LEN 5 /* The maximum VF num is 65535. */ +#define VF_MAX_NAME (VF_PREFIX_LEN + VF_NUM_LEN + 1) + +#define DRIVER_CONFIG_NAME "DRIVER" +#define IOV_CONFIG_NAME "IOV" + +#define TYPE_SCHEMA_NAME "TYPE" +#define DEFAULT_SCHEMA_NAME "DEFAULT" +#define REQUIRED_SCHEMA_NAME "REQUIRED" + +/* + * Because each PF device is expected to expose a unique set of possible + * configurations, the SR-IOV infrastructure dynamically queries the PF + * driver for its capabilities. These capabilities are exposed to userland + * with a configuration schema. The schema is exported from the kernel as a + * packed nvlist. See nv(3) for the details of the nvlist API. The expected + * format of the nvlist is: + * + * BASIC RULES + * 1) All keys are case-insensitive. + * 2) No keys that are not specified below may exist at any level of the + * schema. + * 3) All keys are mandatory unless explicitly documented as optional. If a + * key is mandatory then the associated value is also mandatory. + * 4) Order of keys is irrelevant. + * + * TOP LEVEL + * 1) There must be a top-level key with the name PF_CONFIG_NAME. The value + * associated with this key is a nvlist that follows the device schema + * node format. The parameters in this node specify the configuration + * parameters that may be applied to a PF. + * 2) There must be a top-level key with the name VF_SCHEMA_NAME. The value + * associated with this key is a nvlist that follows the device schema + * node format. The parameters in this node specify the configuration + * parameters that may be applied to a VF. + * + * DEVICE SCHEMA NODE + * 1) There must be a key with the name DRIVER_CONFIG_NAME. The value + * associated with this key is a nvlist that follows the device/subsystem + * schema node format. The parameters in this node specify the + * configuration parameters that are specific to a particular device + * driver. + * 2) There must be a key with the name IOV_CONFIG_NAME. The value associated + * with this key is an nvlist that follows the device/subsystem schema node + * format. The parameters in this node specify the configuration + * parameters that are applied by the SR-IOV infrastructure. + * + * DEVICE/SUBSYSTEM SCHEMA NODE + * 1) All keys in the device/subsystem schema node are optional. + * 2) Each key specifies the name of a valid configuration parameter that may + * be applied to the device/subsystem combination specified by this node. + * The value associated with the key specifies the format of valid + * configuration values, and must be a nvlist in parameter schema node + * format. + * + * PARAMETER SCHEMA NODE + * 1) The parameter schema node must contain a key with the name + * TYPE_SCHEMA_NAME. The value associated with this key must be a string. + * This string specifies the type of value that the parameter specified by + * this node must take. The string must have one of the following values: + * - "bool" - The configuration value must be a boolean. + * - "mac-addr" - The configuration value must be a binary value. In + * addition, the value must be exactly 6 bytes long and + * the value must not be a multicast or broadcast mac. + * - "uint8_t" - The configuration value must be a integer value in + * the range [0, UINT8_MAX]. + * - "uint16_t" - The configuration value must be a integer value in + * the range [0, UINT16_MAX]. + * - "uint32_t" - The configuration value must be a integer value in + * the range [0, UINT32_MAX]. + * - "uint64_t" - The configuration value must be a integer value in + * the range [0, UINT64_MAX]. + * 2) The parameter schema may contain a key with the name + * REQUIRED_SCHEMA_NAME. This key is optional. If this key is present, the + * value associated with it must have a boolean type. If the value is true, + * then the parameter specified by this schema is a required parameter. All + * valid configurations must include all required parameters. + * 3) The parameter schema may contain a key with the name DEFAULT_SCHEMA_NAME. + * This key is optional. This key must not be present if the parameter + * specified by this schema is required. If this key is present, the value + * associated with the parent key must follow all restrictions specified by + * the type specified by this schema. If a configuration does not supply a + * value for the parameter specified by this schema, then the kernel will + * apply the value associated with this key in its place. + * + * The following is an example of a valid schema, as printed by nvlist_dump. + * Keys are printed followed by the type of the value in parantheses. The + * value is displayed following a colon. The indentation level reflects the + * level of nesting of nvlists. String values are displayed between [] + * brackets. Binary values are shown with the length of the binary value (in + * bytes) followed by the actual binary values. + * + * PF (NVLIST): + * IOV (NVLIST): + * num_vfs (NVLIST): + * type (STRING): [uint16_t] + * required (BOOL): TRUE + * device (NVLIST): + * type (STRING): [string] + * required (BOOL): TRUE + * DRIVER (NVLIST): + * VF (NVLIST): + * IOV (NVLIST): + * passthrough (NVLIST): + * type (STRING): [bool] + * default (BOOL): FALSE + * DRIVER (NVLIST): + * mac-addr (NVLIST): + * type (STRING): [mac-addr] + * default (BINARY): 6 000000000000 + * vlan (NVLIST): + * type (STRING): [uint16_t] + * spoof-check (NVLIST): + * type (STRING): [bool] + * default (BOOL): TRUE + * allow-set-mac (NVLIST): + * type (STRING): [bool] + * default (BOOL): FALSE + */ +struct pci_iov_schema +{ + void *schema; + size_t len; + int error; +}; + +/* + * SR-IOV configuration is passed to the kernel as a packed nvlist. See nv(3) + * for the details of the nvlist API. The expected format of the nvlist is: + * + * BASIC RULES + * 1) All keys are case-insensitive. + * 2) No keys that are not specified below may exist at any level of the + * config nvlist. + * 3) Unless otherwise specified, all keys are optional. It should go without + * saying a key being mandatory is transitive: that is, if a key is + * specified to contain a sub-nodes that contains a mandatory key, then + * the outer key is implicitly mandatory. If a key is mandatory then the + * associated value is also mandatory. + * 4) Order of keys is irrelevant. + * + * TOP LEVEL OF CONFIG NVLIST + * 1) All keys specified in this section are mandatory. + * 2) There must be a top-level key with the name PF_CONFIG_NAME. The value + * associated is an nvlist that follows the "device node" format. The + * parameters in this node specify parameters that apply to the PF. + * 3) For every VF being configured (this is set via the "num_vfs" parameter + * in the PF section), there must be a top-level key whose name is VF_PREFIX + * immediately followed by the index of the VF as a decimal integer. For + * example, this would be VF-0 for the first VF. VFs are numbered starting + * from 0. The value associated with this key follows the "device node" + * format. The parameters in this node specify configuration that applies + * to the VF specified in the key. Leading zeros are not permitted in VF + * index. Configuration for the second VF must be specified in a node with + * the key VF-1. VF-01 is not a valid key. + * + * DEVICE NODES + * 1) All keys specified in this section are mandatory. + * 2) The device node must contain a key with the name DRIVER_CONFIG_NAME. The + * value associated with this key is an nvlist following the subsystem node + * format. The parameters in this key specify configuration that is specific + * to a particular device driver. + * 3) The device node must contain a key with the name IOV_CONFIG_NAME. The + * value associated with this key is an nvlist following the subsystem node + * format. The parameters in this key specify configuration that is consumed + * by the SR-IOV infrastructure. + * + * SUBSYSTEM NODES + * 1) A subsystem node specifies configuration parameters that apply to a + * particular subsystem (driver or infrastructure) of a particular device + * (PF or individual VF). + * Note: We will refer to the section of the configuration schema that + * specifies the parameters for this subsystem and device + * configuration as the device/subystem schema. + * 2) The subsystem node must contain only keys that correspond to parameters + * that are specified in the device/subsystem schema. + * 3) Every parameter specified as required in the device/subsystem schema is + * a mandatory key in the subsystem node. + * Note: All parameters that are not required in device/subsystem schema are + * optional keys. In particular, any parameter specified to have a + * default value in the device/subsystem schema is optional. The + * kernel is responsible for applying default values. + * 4) The value of every parameter in the device node must conform to the + * restrictions of the type specified for that parameter in the device/ + * subsystem schema. + * + * The following is an example of a valid configuration, when validated against + * the schema example given above. + * + * PF (NVLIST): + * driver (NVLIST): + * iov (NVLIST): + * num_vfs (NUMBER): 3 (3) (0x3) + * device (STRING): [ix0] + * VF-0 (NVLIST): + * driver (NVLIST): + * vlan (NUMBER): 1000 (1000) (0x3e8) + * iov (NVLIST): + * passthrough (BOOL): TRUE + * VF-1 (NVLIST): + * driver (NVLIST): + * iov (NVLIST): + * VF-2 (NVLIST): + * driver (NVLIST): + * mac-addr (BINARY): 6 020102030405 + * iov (NVLIST): + */ +struct pci_iov_arg +{ + void *config; + size_t len; +}; + +#define IOV_CONFIG _IOW('p', 10, struct pci_iov_arg) +#define IOV_DELETE _IO('p', 11) +#define IOV_GET_SCHEMA _IOWR('p', 12, struct pci_iov_schema) + +#endif + Index: sys/sys/iov_schema.h =================================================================== --- /dev/null +++ sys/sys/iov_schema.h @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 2014-2015 Sandvine Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/sys/sys/iov_schema.h 279451 2015-03-01 00:40:42Z rstone $ + */ + +#ifndef _SYS_IOV_SCHEMA_H_ +#define _SYS_IOV_SCHEMA_H_ + +#define IOV_SCHEMA_HASDEFAULT (1 << 0) +#define IOV_SCHEMA_REQUIRED (1 << 1) + +nvlist_t *pci_iov_schema_alloc_node(void); + +void pci_iov_schema_add_bool(nvlist_t *schema, const char *name, + uint32_t flags, int defaultVal); +void pci_iov_schema_add_string(nvlist_t *schema, const char *name, + uint32_t flags, const char *defaultVal); +void pci_iov_schema_add_uint8(nvlist_t *schema, const char *name, + uint32_t flags, uint8_t defaultVal); +void pci_iov_schema_add_uint16(nvlist_t *schema, const char *name, + uint32_t flags, uint16_t defaultVal); +void pci_iov_schema_add_uint32(nvlist_t *schema, const char *name, + uint32_t flags, uint32_t defaultVal); +void pci_iov_schema_add_uint64(nvlist_t *schema, const char *name, + uint32_t flags, uint64_t defaultVal); +void pci_iov_schema_add_unicast_mac(nvlist_t *schema, const char *name, + uint32_t flags, const uint8_t * defaultVal); + +#endif Index: usr.sbin/Makefile =================================================================== --- usr.sbin/Makefile +++ usr.sbin/Makefile @@ -35,6 +35,7 @@ i2c \ ifmcstat \ iostat \ + iovctl \ kldxref \ mailwrapper \ makefs \ Index: usr.sbin/iovctl/Makefile =================================================================== --- /dev/null +++ usr.sbin/iovctl/Makefile @@ -0,0 +1,20 @@ +# $FreeBSD$ + +PROG= iovctl +SRCS= iovctl.c parse.c validate.c + +DPADD= ${LIBNV} ${LIBUCL} ${LIBM} +LDADD= -lnv -lucl -lm +USEPRIVATELIB= ucl + +CFLAGS+=-I${.CURDIR}/../../contrib/libucl/include + +WARNS?=6 + +MAN= \ + iovctl.8 \ + iovctl.conf.5 \ + +.include +.include + Index: usr.sbin/iovctl/iovctl.h =================================================================== --- /dev/null +++ usr.sbin/iovctl/iovctl.h @@ -0,0 +1,37 @@ +/*- + * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/usr.sbin/iovctl/iovctl.h 279461 2015-03-01 00:52:41Z rstone $ + */ + +#ifndef IOVCTL_H +#define IOVCTL_H + +char * find_device(const char *); +nvlist_t * parse_config_file(const char *, const nvlist_t *); +void validate_config(nvlist_t *, const nvlist_t *, const regex_t *); + +#endif + Index: usr.sbin/iovctl/iovctl.8 =================================================================== --- /dev/null +++ usr.sbin/iovctl/iovctl.8 @@ -0,0 +1,123 @@ +.\" +.\" Copyright (c) 2014 Sandvine Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: head/usr.sbin/iovctl/iovctl.8 285273 2015-07-08 16:16:44Z pkelsey $ +.\" +.Dd July 8, 2015 +.Dt IOVCTL 8 +.Os +.Sh NAME +.Nm iovctl +.Nd "PCI SR-IOV configuration utility" +.Sh SYNOPSIS +.Nm +.Fl C +.Op Fl f Ar config-file +.Op Fl n +.Nm +.Fl D +.Op Fl f Ar config-file | Fl d Ar device +.Op Fl n +.Nm +.Fl S +.Op Fl f Ar config-file | Fl d Ar device +.Sh DESCRIPTION +The +.Nm +utility creates or destroys PCI Single-Root I/O Virtualization +.Pq SR-IOV +Virtual Functions +.Pq VFs . +When invoked with the +.Fl C +flag, +.Nm +creates VFs as children of the Physical Function +.Pq PF +configured in the specified configuration file. +When invoked with the +.Fl D +flag, +.Nm +destroys all VFs that are children of the specified device. +Available PF devices can be seen in +.Pa /dev/iov/ . +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl C +Enable SR-IOV on the specified PF device and create VF children. +This operation will fail if the PF already has VF children. +This option must be used in conjunction with the +.Fl f +option. +.It Fl d Ar device +Specify the PF device to use for the given operation. +.Ar device +may either be the name of a PF device, or a full path name to a node in +.Pa /dev/iov/ . +This option may not be used with the +.Fl C +option. +.It Fl D +Delete all VF children of the specified PF device. +This operation will fail if SR-IOV is not currently enabled on the specified +device. +.It Fl f Ar config-file +Specify the pathname of the configuration file. +For the +.Fl C +option, this file will be used to specify all configuration values. +For the +.Fl D +and +.Fl S +options, this file will only be used to specify the name of the PF device. +.Pp +See +.Xr iovctl.conf +for a description of the config file format and documentation of the +configuration parameters that apply to all PF drivers. +See the PF driver manual page for configuration parameters specific to +particular hardware. +.It Fl n +Perform a dry-run. +Perform all validation of the specified action and print what would be done, +but do not perform the actual creation or destruction of VFs. +This option may not be used with the +.Fl S +flag. +.It Fl S +Read the configuration schema from the specified device and print its contents +to stdout. +This action may be used to discover the configuration parameters supported on +a given PF device. +.El +.Sh SEE ALSO +.Xr iovctl.conf 5 , +.Xr rc.conf 5 +.Sh AUTHORS +This manual page was written by +.An Ryan Stone Aq Mt rstone@FreeBSD.org . Index: usr.sbin/iovctl/iovctl.c =================================================================== --- /dev/null +++ usr.sbin/iovctl/iovctl.c @@ -0,0 +1,403 @@ +/*- + * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: head/usr.sbin/iovctl/iovctl.c 285063 2015-07-02 21:58:10Z oshogbo $"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iovctl.h" + +static void config_action(const char *filename, int dryrun); +static void delete_action(const char *device, int dryrun); +static void print_schema(const char *device); + +/* + * Fetch the config schema from the kernel via ioctl. This function has to + * call the ioctl twice: the first returns the amount of memory that we need + * to allocate for the schema, and the second actually fetches the schema. + */ +static nvlist_t * +get_schema(int fd) +{ + struct pci_iov_schema arg; + nvlist_t *schema; + int error; + + /* Do the ioctl() once to fetch the size of the schema. */ + arg.schema = NULL; + arg.len = 0; + arg.error = 0; + error = ioctl(fd, IOV_GET_SCHEMA, &arg); + if (error != 0) + err(1, "Could not fetch size of config schema"); + + arg.schema = malloc(arg.len); + if (arg.schema == NULL) + err(1, "Could not allocate %zu bytes for schema", + arg.len); + + /* Now do the ioctl() for real to get the schema. */ + error = ioctl(fd, IOV_GET_SCHEMA, &arg); + if (error != 0 || arg.error != 0) { + if (arg.error != 0) + errno = arg.error; + err(1, "Could not fetch config schema"); + } + + schema = nvlist_unpack(arg.schema, arg.len, NV_FLAG_IGNORE_CASE); + if (schema == NULL) + err(1, "Could not unpack schema"); + + free(arg.schema); + return (schema); +} + +/* + * Call the ioctl that activates SR-IOV and creates the VFs. + */ +static void +config_iov(int fd, const char *dev_name, const nvlist_t *config, int dryrun) +{ + struct pci_iov_arg arg; + int error; + + arg.config = nvlist_pack(config, &arg.len); + if (arg.config == NULL) + err(1, "Could not pack configuration"); + + if (dryrun) { + printf("Would enable SR-IOV on device '%s'.\n", dev_name); + printf( + "The following configuration parameters would be used:\n"); + nvlist_fdump(config, stdout); + printf( + "The configuration parameters consume %zu bytes when packed.\n", + arg.len); + } else { + error = ioctl(fd, IOV_CONFIG, &arg); + if (error != 0) + err(1, "Failed to configure SR-IOV"); + } + + free(arg.config); +} + +static int +open_device(const char *dev_name) +{ + char *dev; + int fd; + size_t copied, size; + long path_max; + + path_max = pathconf("/dev", _PC_PATH_MAX); + if (path_max < 0) + err(1, "Could not get maximum path length"); + + size = path_max; + dev = malloc(size); + if (dev == NULL) + err(1, "Could not allocate memory for device path"); + + if (dev_name[0] == '/') + copied = strlcpy(dev, dev_name, size); + else + copied = snprintf(dev, size, "/dev/iov/%s", dev_name); + + /* >= to account for null terminator. */ + if (copied >= size) + errx(1, "Provided file name too long"); + + fd = open(dev, O_RDWR); + if (fd < 0) + err(1, "Could not open device '%s'", dev); + + free(dev); + return (fd); +} + +static void +usage(void) +{ + + warnx("Usage: iovctl -C -f [-n]"); + warnx(" iovctl -D [-d | -f ] [-n]"); + warnx(" iovctl -S [-d | -f ]"); + exit(1); + +} + +enum main_action { + NONE, + CONFIG, + DELETE, + PRINT_SCHEMA, +}; + +int +main(int argc, char **argv) +{ + char *device; + const char *filename; + int ch, dryrun; + enum main_action action; + + device = NULL; + filename = NULL; + dryrun = 0; + action = NONE; + + while ((ch = getopt(argc, argv, "Cd:Df:nS")) != -1) { + switch (ch) { + case 'C': + if (action != NONE) { + warnx( + "Only one of -C, -D or -S may be specified"); + usage(); + } + action = CONFIG; + break; + case 'd': + device = strdup(optarg); + break; + case 'D': + if (action != NONE) { + warnx( + "Only one of -C, -D or -S may be specified"); + usage(); + } + action = DELETE; + break; + case 'f': + filename = optarg; + break; + case 'n': + dryrun = 1; + break; + case 'S': + if (action != NONE) { + warnx( + "Only one of -C, -D or -S may be specified"); + usage(); + } + action = PRINT_SCHEMA; + break; + case '?': + warnx("Unrecognized argument '-%c'\n", optopt); + usage(); + break; + } + } + + if (device != NULL && filename != NULL) { + warnx("Only one of the -d and -f flags may be specified"); + usage(); + } + + if (device == NULL && filename == NULL) { + warnx("Either the -d or -f flag must be specified"); + usage(); + } + + switch (action) { + case CONFIG: + if (filename == NULL) { + warnx("-d flag cannot be used with the -C flag"); + usage(); + } + config_action(filename, dryrun); + break; + case DELETE: + if (device == NULL) + device = find_device(filename); + delete_action(device, dryrun); + free(device); + break; + case PRINT_SCHEMA: + if (dryrun) { + warnx("-n flag cannot be used with the -S flag"); + usage(); + } + if (device == NULL) + device = find_device(filename); + print_schema(device); + free(device); + break; + default: + usage(); + break; + } + + exit(0); +} + +static void +config_action(const char *filename, int dryrun) +{ + char *dev; + nvlist_t *schema, *config; + int fd; + + dev = find_device(filename); + fd = open(dev, O_RDWR); + if (fd < 0) + err(1, "Could not open device '%s'", dev); + + schema = get_schema(fd); + config = parse_config_file(filename, schema); + if (config == NULL) + errx(1, "Could not parse config"); + + config_iov(fd, dev, config, dryrun); + + nvlist_destroy(config); + nvlist_destroy(schema); + free(dev); + close(fd); +} + +static void +delete_action(const char *dev_name, int dryrun) +{ + int fd, error; + + fd = open_device(dev_name); + + if (dryrun) + printf("Would attempt to delete all VF children of '%s'\n", + dev_name); + else { + error = ioctl(fd, IOV_DELETE); + if (error != 0) + err(1, "Failed to delete VFs"); + } + + close(fd); +} + +static void +print_default_value(const nvlist_t *parameter, const char *type) +{ + const uint8_t *mac; + size_t size; + + if (strcasecmp(type, "bool") == 0) + printf(" (default = %s)", + nvlist_get_bool(parameter, DEFAULT_SCHEMA_NAME) ? "true" : + "false"); + else if (strcasecmp(type, "string") == 0) + printf(" (default = %s)", + nvlist_get_string(parameter, DEFAULT_SCHEMA_NAME)); + else if (strcasecmp(type, "uint8_t") == 0) + printf(" (default = %ju)", + (uintmax_t)nvlist_get_number(parameter, + DEFAULT_SCHEMA_NAME)); + else if (strcasecmp(type, "uint16_t") == 0) + printf(" (default = %ju)", + (uintmax_t)nvlist_get_number(parameter, + DEFAULT_SCHEMA_NAME)); + else if (strcasecmp(type, "uint32_t") == 0) + printf(" (default = %ju)", + (uintmax_t)nvlist_get_number(parameter, + DEFAULT_SCHEMA_NAME)); + else if (strcasecmp(type, "uint64_t") == 0) + printf(" (default = %ju)", + (uintmax_t)nvlist_get_number(parameter, + DEFAULT_SCHEMA_NAME)); + else if (strcasecmp(type, "unicast-mac") == 0) { + mac = nvlist_get_binary(parameter, DEFAULT_SCHEMA_NAME, &size); + printf(" (default = %02x:%02x:%02x:%02x:%02x:%02x)", mac[0], + mac[1], mac[2], mac[3], mac[4], mac[5]); + } else + errx(1, "Unexpected type in schema: '%s'", type); +} + +static void +print_subsystem_schema(const nvlist_t * subsystem_schema) +{ + const char *name, *type; + const nvlist_t *parameter; + void *it; + int nvtype; + + it = NULL; + while ((name = nvlist_next(subsystem_schema, &nvtype, &it)) != NULL) { + parameter = nvlist_get_nvlist(subsystem_schema, name); + type = nvlist_get_string(parameter, TYPE_SCHEMA_NAME); + + printf("\t%s : %s", name, type); + if (dnvlist_get_bool(parameter, REQUIRED_SCHEMA_NAME, false)) + printf(" (required)"); + else if (nvlist_exists(parameter, DEFAULT_SCHEMA_NAME)) + print_default_value(parameter, type); + else + printf(" (optional)"); + printf("\n"); + } +} + +static void +print_schema(const char *dev_name) +{ + nvlist_t *schema; + const nvlist_t *iov_schema, *driver_schema, *pf_schema, *vf_schema; + int fd; + + fd = open_device(dev_name); + schema = get_schema(fd); + + pf_schema = nvlist_get_nvlist(schema, PF_CONFIG_NAME); + iov_schema = nvlist_get_nvlist(pf_schema, IOV_CONFIG_NAME); + driver_schema = nvlist_get_nvlist(pf_schema, DRIVER_CONFIG_NAME); + printf( +"The following configuration parameters may be configured on the PF:\n"); + print_subsystem_schema(iov_schema); + print_subsystem_schema(driver_schema); + + vf_schema = nvlist_get_nvlist(schema, VF_SCHEMA_NAME); + iov_schema = nvlist_get_nvlist(vf_schema, IOV_CONFIG_NAME); + driver_schema = nvlist_get_nvlist(vf_schema, DRIVER_CONFIG_NAME); + printf( +"\nThe following configuration parameters may be configured on a VF:\n"); + print_subsystem_schema(iov_schema); + print_subsystem_schema(driver_schema); + + nvlist_destroy(schema); + close(fd); +} Index: usr.sbin/iovctl/iovctl.conf.5 =================================================================== --- /dev/null +++ usr.sbin/iovctl/iovctl.conf.5 @@ -0,0 +1,171 @@ +.\" +.\" Copyright (c) 2014 Sandvine Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: head/usr.sbin/iovctl/iovctl.conf.5 285273 2015-07-08 16:16:44Z pkelsey $ +.\" +.Dd July 8, 2015 +.Dt IOVCTL.CONF 5 +.Os +.Sh NAME +.Nm iovctl.conf +.Nd IOVCTL configuration file +.Sh DESCRIPTION +The +.Nm +file is the configuration file for the +.Xr iovctl 8 +program. +This file specifies configuration parameters for a single Physical Function +.Pq PF +device. +To configure SR-IOV on multiple PF devices, use one configuration file for each +PF. +The locations of all +.Xr iovctl 9 +configuration files are specified in +.Xr rc.conf 5 . +.Pp +The +.Nm +file uses UCL format. +UCL syntax is documented at the official UCL website: +http://github.com/vstakhov/libucl. +.Pp +There are three types of sections in the +.Nm +file. +A section is a key at the top level of the file with a list as its value. +The list may contain the keys specified in the +.Sx OPTIONS +section of this manual page. +Individual PF driver implementations may specify additional device-specific +configuration keys that they will accept. +The order in which sections appear in +.Nm +is ignored. +No two sections may have the same key. +For example, two sections for VF-1 must not be defined. +.Pp +The first section type is the PF section. +This section always has the key "PF"; therefore, only one such section may be +defined. +This section defines configuration parameters that apply to the PF as a whole. +.Pp +The second section type is the VF section. +This section has the key "VF-" followed by a VF index. +VF indices start at 0 and always increment by 1. +Valid VF indices are in the range of 0 to +.Pq num_vfs - 1 . +The VF index must be given as a decimal integer with no leading zeros. +This section defines configuration parameters that apply to a single VF. +.Pp +The third section type is the default section. +This section always has the key "DEFAULT"; therefore, only one such section may +be specified. +This section defines default configuration parameters that apply to all VFs. +All configuration keys that are valid to be applied to a VF are valid in this +section. +An individual VF section may override a default specified in this section by +providing a different value for the configuration parameter. +Note that the default section applies to ALL VFs. +The default section must appear before any VF sections. +The default section may appear before or after the PF section. +.Pp +The following option types are supported: +.Bl -tag -width indent +.It boolean +Accepts a boolean value of true or false. +.It mac-addr +Accepts a unicast MAC address specified as a string of the form +xx:xx:xx:xx:xx:xx, where xx is one or two hexadecimal digits. +.It string +Accepts any string value. +.It uint8_t +Accepts any integer in the range 0 to 255, inclusive. +.It uint16_t +Accepts any integer in the range 0 to 65535, inclusive. +.It uint32_t +Accepts any integer in the range 0 to +.Pq 2**32 - 1 , +inclusive. +.It uint64_t +Accepts any integer in the range 0 to +.Pq 2**64 - 1 , +inclusive. +.El +.Sh OPTIONS +The following parameters are accepted by all PF drivers: +.Bl -tag -width indent +.It device Pq string +This parameter specifies the name of the PF device. +This parameter is required to be specified. +.It num_vfs Pq uint16_t +This parameter specifies the number of VF children to create. +This parameter may not be zero. +The maximum value of this parameter is device-specific. +.El +.Pp +The following parameters are accepted by all VFs: +.Bl -tag -width indent +.It passthrough Pq boolean +This parameter controls whether the VF is reserved for the use of the +.Xr bhyve 8 +hypervisor as a PCI passthrough device. +If this parameter is set to true, then the VF will be reserved as a PCI +passthrough device and it will not be accessible from the host OS. +The default value of this parameter is false. +.El +.Pp +See the PF driver manual page for configuration parameters specific to +particular hardware. +.Sh EXAMPLES +This sample file will create 3 VFs as children of the ix0 device. +VF-1 and VF-2 are set as +.Xr bhyve 8 +passthrough devices through the use of the default section. +VF-0 is not configured as a passthrough device as it explicitly overrides the +default. +VF-0 also sets a device-specific parameter named mac-addr. +.Bd -literal -offset ident +PF { + device : "ix0"; + num_vfs : 3; +} + +DEFAULT { + passthrough : true; +} + +VF-0 { + mac-addr : "02:56:48:7e:d9:f7"; + passthrough : false; +} +.Ed +.Sh SEE ALSO +.Xr rc.conf 5 , +.Xr iovctl 8 +.Sh AUTHORS +This manual page was written by +.An Ryan Stone Aq Mt rstone@FreeBSD.org . Index: usr.sbin/iovctl/parse.c =================================================================== --- /dev/null +++ usr.sbin/iovctl/parse.c @@ -0,0 +1,416 @@ +/*- + * Copyright (c) 2014-2015 Sandvine Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: head/usr.sbin/iovctl/parse.c 285063 2015-07-02 21:58:10Z oshogbo $"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iovctl.h" + +static void +report_config_error(const char *key, const ucl_object_t *obj, const char *type) +{ + + errx(1, "Value '%s' of key '%s' is not of type %s", + ucl_object_tostring(obj), key, type); +} + +/* + * Verifies that the value specified in the config file is a boolean value, and + * then adds the value to the configuration. + */ +static void +add_bool_config(const char *key, const ucl_object_t *obj, nvlist_t *config) +{ + bool val; + + if (!ucl_object_toboolean_safe(obj, &val)) + report_config_error(key, obj, "bool"); + + nvlist_add_bool(config, key, val); +} + +/* + * Verifies that the value specified in the config file is a string, and then + * adds the value to the configuration. + */ +static void +add_string_config(const char *key, const ucl_object_t *obj, nvlist_t *config) +{ + const char *val; + + if (!ucl_object_tostring_safe(obj, &val)) + report_config_error(key, obj, "string"); + + nvlist_add_string(config, key, val); +} + +/* + * Verifies that the value specified in the config file is a integer value + * within the specified range, and then adds the value to the configuration. + */ +static void +add_uint_config(const char *key, const ucl_object_t *obj, nvlist_t *config, + const char *type, uint64_t max) +{ + int64_t val; + uint64_t uval; + + /* I must use a signed type here as libucl doesn't provide unsigned. */ + if (!ucl_object_toint_safe(obj, &val)) + report_config_error(key, obj, type); + + if (val < 0) + report_config_error(key, obj, type); + + uval = val; + if (uval > max) + report_config_error(key, obj, type); + + nvlist_add_number(config, key, uval); +} + +/* + * Verifies that the value specified in the config file is a unicast MAC + * address, and then adds the value to the configuration. + */ +static void +add_unicast_mac_config(const char *key, const ucl_object_t *obj, nvlist_t *config) +{ + uint8_t mac[ETHER_ADDR_LEN]; + const char *val, *token; + char *parse, *orig_parse, *tokpos, *endpos; + size_t len; + u_long value; + int i; + + if (!ucl_object_tostring_safe(obj, &val)) + report_config_error(key, obj, "unicast-mac"); + + parse = strdup(val); + orig_parse = parse; + + i = 0; + while ((token = strtok_r(parse, ":", &tokpos)) != NULL) { + parse = NULL; + + len = strlen(token); + if (len < 1 || len > 2) + report_config_error(key, obj, "unicast-mac"); + + value = strtoul(token, &endpos, 16); + + if (*endpos != '\0') + report_config_error(key, obj, "unicast-mac"); + + if (value > UINT8_MAX) + report_config_error(key, obj, "unicast-mac"); + + if (i >= ETHER_ADDR_LEN) + report_config_error(key, obj, "unicast-mac"); + + mac[i] = value; + i++; + } + + free(orig_parse); + + if (i != ETHER_ADDR_LEN) + report_config_error(key, obj, "unicast-mac"); + + if (ETHER_IS_MULTICAST(mac)) + errx(1, "Value '%s' of key '%s' is a multicast address", + ucl_object_tostring(obj), key); + + nvlist_add_binary(config, key, mac, ETHER_ADDR_LEN); +} + +/* + * Validates that the given configuation value has the right type as specified + * in the schema, and then adds the value to the configuation node. + */ +static void +add_config(const char *key, const ucl_object_t *obj, nvlist_t *config, + const nvlist_t *schema) +{ + const char *type; + + type = nvlist_get_string(schema, TYPE_SCHEMA_NAME); + + if (strcasecmp(type, "bool") == 0) + add_bool_config(key, obj, config); + else if (strcasecmp(type, "string") == 0) + add_string_config(key, obj, config); + else if (strcasecmp(type, "uint8_t") == 0) + add_uint_config(key, obj, config, type, UINT8_MAX); + else if (strcasecmp(type, "uint16_t") == 0) + add_uint_config(key, obj, config, type, UINT16_MAX); + else if (strcasecmp(type, "uint32_t") == 0) + add_uint_config(key, obj, config, type, UINT32_MAX); + else if (strcasecmp(type, "uint64_t") == 0) + add_uint_config(key, obj, config, type, UINT64_MAX); + else if (strcasecmp(type, "unicast-mac") == 0) + add_unicast_mac_config(key, obj, config); + else + errx(1, "Unexpected type '%s' in schema", type); +} + +/* + * Parses all values specified in a device section in the configuration file, + * validates that the key/value pair is valid in the schema, and then adds + * the key/value pair to the correct subsystem in the config. + */ +static void +parse_device_config(const ucl_object_t *top, nvlist_t *config, + const char *subsystem, const nvlist_t *schema) +{ + ucl_object_iter_t it; + const ucl_object_t *obj; + nvlist_t *subsystem_config, *driver_config, *iov_config; + const nvlist_t *driver_schema, *iov_schema; + const char *key; + + if (nvlist_exists(config, subsystem)) + errx(1, "Multiple definitions of '%s' in config file", + subsystem); + + driver_schema = nvlist_get_nvlist(schema, DRIVER_CONFIG_NAME); + iov_schema = nvlist_get_nvlist(schema, IOV_CONFIG_NAME); + + driver_config = nvlist_create(NV_FLAG_IGNORE_CASE); + if (driver_config == NULL) + err(1, "Could not allocate config nvlist"); + + iov_config = nvlist_create(NV_FLAG_IGNORE_CASE); + if (iov_config == NULL) + err(1, "Could not allocate config nvlist"); + + subsystem_config = nvlist_create(NV_FLAG_IGNORE_CASE); + if (subsystem_config == NULL) + err(1, "Could not allocate config nvlist"); + + it = NULL; + while ((obj = ucl_iterate_object(top, &it, true)) != NULL) { + key = ucl_object_key(obj); + + if (nvlist_exists_nvlist(iov_schema, key)) + add_config(key, obj, iov_config, + nvlist_get_nvlist(iov_schema, key)); + else if (nvlist_exists_nvlist(driver_schema, key)) + add_config(key, obj, driver_config, + nvlist_get_nvlist(driver_schema, key)); + else + errx(1, "%s: Invalid config key '%s'", subsystem, key); + } + + nvlist_move_nvlist(subsystem_config, DRIVER_CONFIG_NAME, driver_config); + nvlist_move_nvlist(subsystem_config, IOV_CONFIG_NAME, iov_config); + nvlist_move_nvlist(config, subsystem, subsystem_config); +} + +/* + * Parses the specified config file using the given schema, and returns an + * nvlist containing the configuration specified by the file. + * + * Exits with a message to stderr and an error if any config validation fails. + */ +nvlist_t * +parse_config_file(const char *filename, const nvlist_t *schema) +{ + ucl_object_iter_t it; + struct ucl_parser *parser; + ucl_object_t *top; + const ucl_object_t *obj; + nvlist_t *config; + const nvlist_t *pf_schema, *vf_schema; + const char *errmsg, *key; + regex_t vf_pat; + int regex_err, processed_vf; + + regex_err = regcomp(&vf_pat, "^"VF_PREFIX"([1-9][0-9]*|0)$", + REG_EXTENDED | REG_ICASE); + if (regex_err != 0) + errx(1, "Could not compile VF regex"); + + parser = ucl_parser_new(0); + if (parser == NULL) + err(1, "Could not allocate parser"); + + if (!ucl_parser_add_file(parser, filename)) + err(1, "Could not open '%s' for reading", filename); + + errmsg = ucl_parser_get_error(parser); + if (errmsg != NULL) + errx(1, "Could not parse '%s': %s", filename, errmsg); + + config = nvlist_create(NV_FLAG_IGNORE_CASE); + if (config == NULL) + err(1, "Could not allocate config nvlist"); + + pf_schema = nvlist_get_nvlist(schema, PF_CONFIG_NAME); + vf_schema = nvlist_get_nvlist(schema, VF_SCHEMA_NAME); + + processed_vf = 0; + top = ucl_parser_get_object(parser); + it = NULL; + while ((obj = ucl_iterate_object(top, &it, true)) != NULL) { + key = ucl_object_key(obj); + + if (strcasecmp(key, PF_CONFIG_NAME) == 0) + parse_device_config(obj, config, key, pf_schema); + else if (strcasecmp(key, DEFAULT_SCHEMA_NAME) == 0) { + /* + * Enforce that the default section must come before all + * VF sections. This will hopefully prevent confusing + * the user by having a default value apply to a VF + * that was declared earlier in the file. + * + * This also gives us the flexibility to extend the file + * format in the future to allow for multiple default + * sections that do only apply to subsequent VF + * sections. + */ + if (processed_vf) + errx(1, + "'default' section must precede all VF sections"); + + parse_device_config(obj, config, key, vf_schema); + } else if (regexec(&vf_pat, key, 0, NULL, 0) == 0) { + processed_vf = 1; + parse_device_config(obj, config, key, vf_schema); + } else + errx(1, "Unexpected top-level node: %s", key); + } + + validate_config(config, schema, &vf_pat); + + ucl_object_unref(top); + ucl_parser_free(parser); + regfree(&vf_pat); + + return (config); +} + +/* + * Parse the PF configuration section for and return the value specified for + * the device parameter, or NULL if the device is not specified. + */ +static const char * +find_pf_device(const ucl_object_t *pf) +{ + ucl_object_iter_t it; + const ucl_object_t *obj; + const char *key, *device; + + it = NULL; + while ((obj = ucl_iterate_object(pf, &it, true)) != NULL) { + key = ucl_object_key(obj); + + if (strcasecmp(key, "device") == 0) { + if (!ucl_object_tostring_safe(obj, &device)) + err(1, + "Config PF.device must be a string"); + + return (device); + } + } + + return (NULL); +} + +/* + * Manually parse the config file looking for the name of the PF device. We + * have to do this separately because we need the config schema to call the + * normal config file parsing code, and we need to know the name of the PF + * device so that we can fetch the schema from it. + * + * This will always exit on failure, so if it returns then it is guaranteed to + * have returned a valid device name. + */ +char * +find_device(const char *filename) +{ + char *device; + const char *deviceName; + ucl_object_iter_t it; + struct ucl_parser *parser; + ucl_object_t *top; + const ucl_object_t *obj; + const char *errmsg, *key; + int error; + + device = NULL; + deviceName = NULL; + + parser = ucl_parser_new(0); + if (parser == NULL) + err(1, "Could not allocate parser"); + + if (!ucl_parser_add_file(parser, filename)) + err(1, "Could not open '%s' for reading", filename); + + errmsg = ucl_parser_get_error(parser); + if (errmsg != NULL) + errx(1, "Could not parse '%s': %s", filename, errmsg); + + top = ucl_parser_get_object (parser); + it = NULL; + while ((obj = ucl_iterate_object(top, &it, true)) != NULL) { + key = ucl_object_key(obj); + + if (strcasecmp(key, PF_CONFIG_NAME) == 0) { + deviceName = find_pf_device(obj); + break; + } + } + + if (deviceName == NULL) + errx(1, "Config file does not specify device"); + + error = asprintf(&device, "/dev/iov/%s", deviceName); + if (error < 0) + err(1, "Could not allocate memory for device"); + + ucl_object_unref(top); + ucl_parser_free(parser); + + return (device); +} Index: usr.sbin/iovctl/validate.c =================================================================== --- /dev/null +++ usr.sbin/iovctl/validate.c @@ -0,0 +1,274 @@ +/*- + * Copyright (c) 2014-2015 Sandvine Inc. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: head/usr.sbin/iovctl/validate.c 285063 2015-07-02 21:58:10Z oshogbo $"); + +#include +#include +#include +#include + +#include +#include +#include + +#include "iovctl.h" + +/* + * Returns a writeable pointer to the configuration for the given device. + * If no configuration exists, a new nvlist with empty driver and iov + * sections is allocated and returned. + * + * Returning a writeable pointer requires removing the configuration from config + * using nvlist_take. It is the responsibility of the caller to re-insert the + * nvlist in config with nvlist_move_nvlist. + */ +static nvlist_t * +find_config(nvlist_t *config, const char * device) +{ + nvlist_t *subsystem, *empty_driver, *empty_iov; + + subsystem = dnvlist_take_nvlist(config, device, NULL); + + if (subsystem != NULL) + return (subsystem); + + empty_driver = nvlist_create(NV_FLAG_IGNORE_CASE); + if (empty_driver == NULL) + err(1, "Could not allocate config nvlist"); + + empty_iov = nvlist_create(NV_FLAG_IGNORE_CASE); + if (empty_iov == NULL) + err(1, "Could not allocate config nvlist"); + + subsystem = nvlist_create(NV_FLAG_IGNORE_CASE); + if (subsystem == NULL) + err(1, "Could not allocate config nvlist"); + + nvlist_move_nvlist(subsystem, DRIVER_CONFIG_NAME, empty_driver); + nvlist_move_nvlist(subsystem, IOV_CONFIG_NAME, empty_iov); + + return (subsystem); +} + +static uint16_t +parse_vf_num(const char *key, regmatch_t *matches) +{ + u_long vf_num; + + vf_num = strtoul(key + matches[1].rm_so, NULL, 10); + + if (vf_num > UINT16_MAX) + errx(1, "VF number %lu is too large to be valid", + vf_num); + + return (vf_num); +} + +/* + * Apply the default values specified in device_defaults to the specified + * subsystem in the given device_config. + * + * This function assumes that the values specified in device_defaults have + * already been validated. + */ +static void +apply_subsystem_defaults(nvlist_t *device_config, const char *subsystem, + const nvlist_t *device_defaults) +{ + nvlist_t *config; + const nvlist_t *defaults; + const char *name; + void *cookie; + size_t len; + const void *bin; + int type; + + config = nvlist_take_nvlist(device_config, subsystem); + defaults = nvlist_get_nvlist(device_defaults, subsystem); + + cookie = NULL; + while ((name = nvlist_next(defaults, &type, &cookie)) != NULL) { + if (nvlist_exists(config, name)) + continue; + + switch (type) { + case NV_TYPE_BOOL: + nvlist_add_bool(config, name, + nvlist_get_bool(defaults, name)); + break; + case NV_TYPE_NUMBER: + nvlist_add_number(config, name, + nvlist_get_number(defaults, name)); + break; + case NV_TYPE_STRING: + nvlist_add_string(config, name, + nvlist_get_string(defaults, name)); + break; + case NV_TYPE_NVLIST: + nvlist_add_nvlist(config, name, + nvlist_get_nvlist(defaults, name)); + break; + case NV_TYPE_BINARY: + bin = nvlist_get_binary(defaults, name, &len); + nvlist_add_binary(config, name, bin, len); + break; + default: + errx(1, "Unexpected type '%d'", type); + } + } + nvlist_move_nvlist(device_config, subsystem, config); +} + +/* + * Iterate over every subsystem in the given VF device and apply default values + * for parameters that were not configured with a value. + * + * This function assumes that the values specified in defaults have already been + * validated. + */ +static void +apply_defaults(nvlist_t *vf, const nvlist_t *defaults) +{ + + apply_subsystem_defaults(vf, DRIVER_CONFIG_NAME, defaults); + apply_subsystem_defaults(vf, IOV_CONFIG_NAME, defaults); +} + +/* + * Validate that all required parameters have been configured in the specified + * subsystem. + */ +static void +validate_subsystem(const nvlist_t *device, const nvlist_t *device_schema, + const char *subsystem_name, const char *config_name) +{ + const nvlist_t *subsystem, *schema, *config; + const char *name; + void *cookie; + int type; + + subsystem = nvlist_get_nvlist(device, subsystem_name); + schema = nvlist_get_nvlist(device_schema, subsystem_name); + + cookie = NULL; + while ((name = nvlist_next(schema, &type, &cookie)) != NULL) { + config = nvlist_get_nvlist(schema, name); + + if (dnvlist_get_bool(config, REQUIRED_SCHEMA_NAME, false)) { + if (!nvlist_exists(subsystem, name)) + errx(1, + "Required parameter '%s' not found in '%s'", + name, config_name); + } + } +} + +/* + * Validate that all required parameters have been configured in all subsystems + * in the device. + */ +static void +validate_device(const nvlist_t *device, const nvlist_t *schema, + const char *config_name) +{ + + validate_subsystem(device, schema, DRIVER_CONFIG_NAME, config_name); + validate_subsystem(device, schema, IOV_CONFIG_NAME, config_name); +} + +static uint16_t +get_num_vfs(const nvlist_t *pf) +{ + const nvlist_t *iov; + + iov = nvlist_get_nvlist(pf, IOV_CONFIG_NAME); + return (nvlist_get_number(iov, "num_vfs")); +} + +/* + * Validates the configuration that has been parsed into config using the given + * config schema. Note that the parser is required to not insert configuration + * keys that are not valid in the schema, and to not insert configuration values + * that are of the incorrect type. Therefore this function will not validate + * either condition. This function is only responsible for inserting config + * file defaults in individual VF sections and removing the DEFAULT_SCHEMA_NAME + * subsystem from config, validating that all required parameters in the schema + * are present in each PF and VF subsystem, and that there is no VF subsystem + * section whose number exceeds num_vfs. + */ +void +validate_config(nvlist_t *config, const nvlist_t *schema, const regex_t *vf_pat) +{ + char device_name[VF_MAX_NAME]; + regmatch_t matches[2]; + nvlist_t *defaults, *pf, *vf; + const nvlist_t *vf_schema; + const char *key; + void *cookie; + int i, type; + uint16_t vf_num, num_vfs; + + pf = find_config(config, PF_CONFIG_NAME); + validate_device(pf, nvlist_get_nvlist(schema, PF_CONFIG_NAME), + PF_CONFIG_NAME); + nvlist_move_nvlist(config, PF_CONFIG_NAME, pf); + + num_vfs = get_num_vfs(pf); + vf_schema = nvlist_get_nvlist(schema, VF_SCHEMA_NAME); + + if (num_vfs == 0) + errx(1, "PF.num_vfs must be at least 1"); + + defaults = dnvlist_take_nvlist(config, DEFAULT_SCHEMA_NAME, NULL); + + for (i = 0; i < num_vfs; i++) { + snprintf(device_name, sizeof(device_name), VF_PREFIX"%d", + i); + + vf = find_config(config, device_name); + + if (defaults != NULL) + apply_defaults(vf, defaults); + + validate_device(vf, vf_schema, device_name); + nvlist_move_nvlist(config, device_name, vf); + } + nvlist_destroy(defaults); + + cookie = NULL; + while ((key = nvlist_next(config, &type, &cookie)) != NULL) { + if (regexec(vf_pat, key, nitems(matches), matches, 0) == 0) { + vf_num = parse_vf_num(key, matches); + if (vf_num >= num_vfs) + errx(1, + "VF number %d is out of bounds (num_vfs=%d)", + vf_num, num_vfs); + } + } +} + Index: usr.sbin/pciconf/cap.c =================================================================== --- usr.sbin/pciconf/cap.c +++ usr.sbin/pciconf/cap.c @@ -30,13 +30,14 @@ #ifndef lint static const char rcsid[] = - "$FreeBSD$"; + "$FreeBSD: head/usr.sbin/pciconf/cap.c 290412 2015-11-05 20:24:56Z jhb $"; #endif /* not lint */ #include #include #include +#include #include #include @@ -640,7 +641,7 @@ printf(" %d fatal", bitcount32(sta & mask)); printf(" %d non-fatal", bitcount32(sta & ~mask)); sta = read_config(fd, &p->pc_sel, ptr + PCIR_AER_COR_STATUS, 4); - printf(" %d corrected", bitcount32(sta)); + printf(" %d corrected\n", bitcount32(sta)); } static void @@ -656,6 +657,7 @@ if ((cap1 & PCIM_VC_CAP1_LOWPRI_EXT_COUNT) != 0) printf(" lowpri VC0-VC%d", (cap1 & PCIM_VC_CAP1_LOWPRI_EXT_COUNT) >> 4); + printf("\n"); } static void @@ -668,7 +670,7 @@ return; low = read_config(fd, &p->pc_sel, ptr + PCIR_SERIAL_LOW, 4); high = read_config(fd, &p->pc_sel, ptr + PCIR_SERIAL_HIGH, 4); - printf(" %08x%08x", high, low); + printf(" %08x%08x\n", high, low); } static void @@ -680,7 +682,7 @@ if (ver < 1) return; val = read_config(fd, &p->pc_sel, ptr + 4, 4); - printf(" ID %d", val & 0xffff); + printf(" ID %d\n", val & 0xffff); } static void @@ -692,7 +694,69 @@ if (ver < 1) return; val = read_config(fd, &p->pc_sel, ptr + 8, 4); - printf(" lane errors %#x", val); + printf(" lane errors %#x\n", val); +} + +static const char * +check_enabled(int value) +{ + + return (value ? "enabled" : "disabled"); +} + +static void +ecap_sriov(int fd, struct pci_conf *p, uint16_t ptr, uint8_t ver) +{ + const char *comma, *enabled; + uint16_t iov_ctl, total_vfs, num_vfs, vf_offset, vf_stride, vf_did; + uint32_t page_caps, page_size, page_shift, size; + int i; + + printf("SR-IOV %d ", ver); + + iov_ctl = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_CTL, 2); + printf("IOV %s, Memory Space %s, ARI %s\n", + check_enabled(iov_ctl & PCIM_SRIOV_VF_EN), + check_enabled(iov_ctl & PCIM_SRIOV_VF_MSE), + check_enabled(iov_ctl & PCIM_SRIOV_ARI_EN)); + + total_vfs = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_TOTAL_VFS, 2); + num_vfs = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_NUM_VFS, 2); + printf(" "); + printf("%d VFs configured out of %d supported\n", num_vfs, total_vfs); + + vf_offset = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_VF_OFF, 2); + vf_stride = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_VF_STRIDE, 2); + printf(" "); + printf("First VF RID Offset 0x%04x, VF RID Stride 0x%04x\n", vf_offset, + vf_stride); + + vf_did = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_VF_DID, 2); + printf(" VF Device ID 0x%04x\n", vf_did); + + page_caps = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_PAGE_CAP, 4); + page_size = read_config(fd, &p->pc_sel, ptr + PCIR_SRIOV_PAGE_SIZE, 4); + printf(" "); + printf("Page Sizes: "); + comma = ""; + while (page_caps != 0) { + page_shift = ffs(page_caps) - 1; + + if (page_caps & page_size) + enabled = " (enabled)"; + else + enabled = ""; + + size = (1 << (page_shift + PCI_SRIOV_BASE_PAGE_SHIFT)); + printf("%s%d%s", comma, size, enabled); + comma = ", "; + + page_caps &= ~(1 << page_shift); + } + printf("\n"); + + for (i = 0; i <= PCIR_MAX_BAR_0; i++) + print_bar(fd, p, "iov bar ", ptr + PCIR_SRIOV_BAR(i)); } struct { @@ -708,7 +772,6 @@ { PCIZ_ACS, "ACS" }, { PCIZ_ARI, "ARI" }, { PCIZ_ATS, "ATS" }, - { PCIZ_SRIOV, "SRIOV" }, { PCIZ_MULTICAST, "Multicast" }, { PCIZ_RESIZE_BAR, "Resizable BAR" }, { PCIZ_DPA, "DPA" }, @@ -747,6 +810,9 @@ case PCIZ_SEC_PCIE: ecap_sec_pcie(fd, p, ptr, PCI_EXTCAP_VER(ecap)); break; + case PCIZ_SRIOV: + ecap_sriov(fd, p, ptr, PCI_EXTCAP_VER(ecap)); + break; default: name = "unknown"; for (i = 0; ecap_names[i].name != NULL; i++) @@ -754,10 +820,9 @@ name = ecap_names[i].name; break; } - printf("%s %d", name, PCI_EXTCAP_VER(ecap)); + printf("%s %d\n", name, PCI_EXTCAP_VER(ecap)); break; } - printf("\n"); ptr = PCI_EXTCAP_NEXTPTR(ecap); if (ptr == 0) break; Index: usr.sbin/pciconf/pciconf.h =================================================================== --- usr.sbin/pciconf/pciconf.h +++ usr.sbin/pciconf/pciconf.h @@ -27,7 +27,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD$ + * $FreeBSD: head/usr.sbin/pciconf/pciconf.h 279466 2015-03-01 00:59:35Z rstone $ */ #ifndef __PCICONF_H__ @@ -37,6 +37,7 @@ void list_errors(int fd, struct pci_conf *p); uint8_t pci_find_cap(int fd, struct pci_conf *p, uint8_t id); uint16_t pcie_find_cap(int fd, struct pci_conf *p, uint16_t id); +void print_bar(int fd, struct pci_conf *p, const char *label, uint16_t bar); uint32_t read_config(int fd, struct pcisel *sel, long reg, int width); #endif Index: usr.sbin/pciconf/pciconf.c =================================================================== --- usr.sbin/pciconf/pciconf.c +++ usr.sbin/pciconf/pciconf.c @@ -29,7 +29,7 @@ #ifndef lint static const char rcsid[] = - "$FreeBSD$"; + "$FreeBSD: head/usr.sbin/pciconf/pciconf.c 287522 2015-09-06 20:05:29Z bapt $"; #endif /* not lint */ #include @@ -234,9 +234,9 @@ for (p = conf; p < &conf[pc.num_matches]; p++) { printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x " "chip=0x%08x rev=0x%02x hdr=0x%02x\n", - (p->pd_name && *p->pd_name) ? p->pd_name : + *p->pd_name ? p->pd_name : "none", - (p->pd_name && *p->pd_name) ? (int)p->pd_unit : + *p->pd_name ? (int)p->pd_unit : none_count++, p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev, p->pc_sel.pc_func, (p->pc_class << 16) | @@ -263,10 +263,7 @@ static void list_bars(int fd, struct pci_conf *p) { - struct pci_bar_io bar; - uint64_t base; - const char *type; - int i, range, max; + int i, max; switch (p->pc_hdr & PCIM_HDRTYPE) { case PCIM_HDRTYPE_NORMAL: @@ -282,40 +279,50 @@ return; } - for (i = 0; i <= max; i++) { - bar.pbi_sel = p->pc_sel; - bar.pbi_reg = PCIR_BAR(i); - if (ioctl(fd, PCIOCGETBAR, &bar) < 0) - continue; - if (PCI_BAR_IO(bar.pbi_base)) { - type = "I/O Port"; + for (i = 0; i <= max; i++) + print_bar(fd, p, "bar ", PCIR_BAR(i)); +} + +void +print_bar(int fd, struct pci_conf *p, const char *label, uint16_t bar_offset) +{ + uint64_t base; + const char *type; + struct pci_bar_io bar; + int range; + + bar.pbi_sel = p->pc_sel; + bar.pbi_reg = bar_offset; + if (ioctl(fd, PCIOCGETBAR, &bar) < 0) + return; + if (PCI_BAR_IO(bar.pbi_base)) { + type = "I/O Port"; + range = 32; + base = bar.pbi_base & PCIM_BAR_IO_BASE; + } else { + if (bar.pbi_base & PCIM_BAR_MEM_PREFETCH) + type = "Prefetchable Memory"; + else + type = "Memory"; + switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) { + case PCIM_BAR_MEM_32: range = 32; - base = bar.pbi_base & PCIM_BAR_IO_BASE; - } else { - if (bar.pbi_base & PCIM_BAR_MEM_PREFETCH) - type = "Prefetchable Memory"; - else - type = "Memory"; - switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) { - case PCIM_BAR_MEM_32: - range = 32; - break; - case PCIM_BAR_MEM_1MB: - range = 20; - break; - case PCIM_BAR_MEM_64: - range = 64; - break; - default: - range = -1; - } - base = bar.pbi_base & ~((uint64_t)0xf); + break; + case PCIM_BAR_MEM_1MB: + range = 20; + break; + case PCIM_BAR_MEM_64: + range = 64; + break; + default: + range = -1; } - printf(" bar [%02x] = type %s, range %2d, base %#jx, ", - PCIR_BAR(i), type, range, (uintmax_t)base); - printf("size %ju, %s\n", (uintmax_t)bar.pbi_length, - bar.pbi_enabled ? "enabled" : "disabled"); + base = bar.pbi_base & ~((uint64_t)0xf); } + printf(" %s[%02x] = type %s, range %2d, base %#jx, ", + label, bar_offset, type, range, (uintmax_t)base); + printf("size %ju, %s\n", (uintmax_t)bar.pbi_length, + bar.pbi_enabled ? "enabled" : "disabled"); } static void