Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F142491390
D7697.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
48 KB
Referenced Files
None
Subscribers
None
D7697.diff
View Options
Index: sys/dev/pci/pci_pci.c
===================================================================
--- sys/dev/pci/pci_pci.c
+++ sys/dev/pci/pci_pci.c
@@ -43,6 +43,10 @@
#include <sys/rman.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
+#include <sys/queue.h>
+#include <sys/mutex.h>
+#include <sys/lock.h>
+#include <machine/pci_cfgreg.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
@@ -108,6 +112,11 @@
DEVMETHOD_END
};
+static MALLOC_DEFINE(M_AER, "AER", "Advanced Error Report");
+static STAILQ_HEAD(, AER_node) AER_root;
+static int AER_counter = 0;
+static struct mtx AER_lock;
+
static devclass_t pcib_devclass;
DEFINE_CLASS_0(pcib, pcib_driver, pcib_methods, sizeof(struct pcib_softc));
@@ -903,6 +912,333 @@
return(ENXIO);
}
+
+/*
+ * sysctl handler: reads error records.
+ */
+static int
+sysctl_pcib_error_records(SYSCTL_HANDLER_ARGS)
+{
+ int *name;
+ int report_idx;
+ u_int namelen;
+ struct AER_error_data *data;
+ struct AER_node *node;
+
+ name = (int*)arg1;
+ report_idx = name[0];
+ namelen = arg2;
+ data = NULL;
+
+ /* Input index cannot exceed #error actually recv */
+ if (namelen != 1)
+ return (EINVAL);
+
+ mtx_lock_spin(&AER_lock);
+ if (report_idx < 0 || report_idx >= AER_counter ||
+ report_idx < (AER_counter - AER_REC_MAXLEN)) {
+ mtx_unlock_spin(&AER_lock);
+ return (EINVAL);
+ }
+
+ /* Loop to index and sysctl */
+ STAILQ_FOREACH(node, &AER_root, next) {
+ if (node->index == report_idx) {
+ data = node->AER_error_data_container;
+ break;
+ }
+ }
+ mtx_unlock_spin(&AER_lock);
+
+ return (SYSCTL_OUT(req, data, sizeof(*data)));
+
+}
+
+
+/*
+ * sysctl handler: An optional feature to clear the error records.
+ * using sysctl call to hw.pci.pcib_clear_error_records
+ * with parameter name[]
+ *
+ */
+/*
+static int
+sysctl_pcib_clear_error_records(SYSCTL_HANDLER_ARGS)
+{
+ int *name;
+ int clear;
+ name = (int *)arg1;
+ clear = name[0];
+
+ if (clear == 1) {
+ mtx_lock_spin(&AER_lock);
+ STAILQ_INIT(&AER_root);
+ AER_counter = 0;
+ mtx_unlock_spin(&AER_lock);
+ }
+ else
+ return (EINVAL);
+
+ return 0;
+}
+
+SYSCTL_NODE(_hw_pci, OID_AUTO, pcib_clear_error_records, CTLFLAG_RD, sysctl_pcib_clear_error_records,
+ "PCI Bridge Clear Error records");
+*/
+
+
+/*
+ * sysctl handler: read pci config registers For error injection purpose.
+ */
+static int
+sysctl_pcib_probe(SYSCTL_HANDLER_ARGS)
+{
+ int *name, bus, slot, func, reg, bytes, result;
+ u_int namelen;
+
+ namelen = arg2;
+ if (namelen != 5)
+ return (EINVAL);
+
+ name = (int *)arg1;
+ bus = name[0];
+ slot = name[1];
+ func = name[2];
+ reg = name[3];
+ bytes = name[4];
+ result = pci_cfgregread(bus, slot, func, reg, bytes);
+
+ return (SYSCTL_OUT(req, &result, sizeof(int)));
+}
+
+
+/*
+ * Setup sysctl on hw.pci
+ */
+SYSCTL_INT(_hw_pci, OID_AUTO, pcib_error_count, CTLFLAG_RD, &AER_counter, 0,
+ "PCI Bridge Intr count");
+SYSCTL_NODE(_hw_pci, OID_AUTO, pcib_error_records, CTLFLAG_RD, sysctl_pcib_error_records,
+ "PCI Bridge Error records");
+SYSCTL_NODE(_hw_pci, OID_AUTO, pcib_probe, CTLFLAG_RD, sysctl_pcib_probe,
+ "PCI Bridge Probing for Error Injection");
+
+/*
+ * Interrupt handler
+ */
+static void
+pcib_intr_handler(void *arg)
+{
+ struct pcib_softc *sc;
+ struct AER_node *current;
+ struct AER_error_data *error_data;
+ device_t dev;
+ int offsAER;
+ int offsPCIE;
+ int temp;
+ int error;
+ int current_AER_counter;
+
+ sc = arg;
+ dev = sc->dev;
+ device_printf(dev, ": New Interrupt Received\n");
+
+ mtx_lock_spin(&AER_lock);
+ AER_counter += 1;
+ current_AER_counter = AER_counter;
+ mtx_unlock_spin(&AER_lock);
+
+ if ((current_AER_counter > AER_REC_MAXLEN) && !STAILQ_EMPTY(&AER_root)) {
+ STAILQ_REMOVE_HEAD(&AER_root, next);
+ printf("PCI-E Bridge AER Record list full, remove oldest record.\n");
+ }
+
+ /* Record Data */
+ error_data = malloc(sizeof(*error_data), M_AER, M_WAITOK | M_ZERO);
+
+ pci_find_cap(dev, PCIY_EXPRESS, &offsPCIE);
+ error = pci_find_extcap(dev, PCIZ_AER, &offsAER);
+
+ /* PCI-E error status */
+ temp = pci_read_config(dev, offsPCIE + PCIER_DEVICE_STA, 2);
+ error_data->pcie_cap_status = temp;
+ if (temp >= 0)
+ pci_write_config(dev, offsPCIE + PCIER_DEVICE_STA, BCLR_PCIE_STATUS, 1);
+
+ if (error) {
+ device_printf(dev, ": AER not supported\n");
+ } else {
+ /* Record source tag */
+ error_data->error_number = current_AER_counter;
+ error_data->dev_name = device_get_name(dev);
+ error_data->unit = device_get_unit(dev);
+
+ /* Uncorrectable error reg */
+ temp = pci_read_config(dev, offsAER+PCIR_AER_UC_STATUS, 4);
+ error_data->unc_err_status = temp;
+ if (temp >= 0)
+ pci_write_config(dev, offsAER + PCIR_AER_UC_STATUS, BCLR_AER_UNC_STATUS, 4);
+
+ /* Uncorrectable error severity */
+ error_data->unc_err_seve = pci_read_config(dev, offsAER + PCIR_AER_UC_SEVERITY, 4);
+
+ /* Correctable error reg */
+ temp = pci_read_config(dev, offsAER+PCIR_AER_COR_STATUS, 4);
+ error_data->c_err_status = temp;
+ if (temp >= 0)
+ pci_write_config(dev, offsAER + PCIR_AER_COR_STATUS, BCLR_AER_C_STATUS, 4);
+
+ /* ECRC */
+ error_data->AER_ECRC = pci_read_config(dev, offsAER + PCIR_AER_CAP_CONTROL, 4);
+
+ /* Root Port error status */
+ temp = pci_read_config(dev, offsAER + PCIR_AER_ROOTERR_STATUS, 4);
+ error_data->root_err_status = temp;
+ if (temp >= 0)
+ pci_write_config(dev, offsAER + PCIR_AER_ROOTERR_STATUS, BCLR_AER_RTSTATUS, 4);
+
+ /* ERROR Source */
+ error_data->err_src_ID = pci_read_config(dev, offsAER+PCIR_AER_ERR_SOURCE_ID, 2);
+ error_data->c_err_src_ID = pci_read_config(dev, offsAER+PCIR_AER_COR_SOURCE_ID, 2);
+ }
+
+ /* Save Node */
+ current = malloc(sizeof(*current), M_AER, M_WAITOK | M_ZERO);
+ current->index = current_AER_counter-1;
+ current->AER_error_data_container = error_data;
+
+ mtx_lock_spin(&AER_lock);
+ STAILQ_INSERT_TAIL(&AER_root, current, next);
+ mtx_unlock_spin(&AER_lock);
+
+}
+
+
+/*
+ * Setup Interrupt & Reg interrupt handler
+ */
+static void
+pcib_setup_intr_common(struct pcib_softc *sc)
+{
+ int error;
+ int offsAER;
+ int offsPCIE;
+ int dev_ctl;
+ int intr_count;
+ device_t dev = sc->dev;
+
+ pci_find_cap(dev, PCIY_EXPRESS, &offsPCIE);
+ error = pci_find_extcap(dev, PCIZ_AER, &offsAER);
+
+ /* Enable device error report */
+ if (error != 0) {
+ device_printf(dev, "Interrupt not registered: AER not supported\n");
+ return;
+ }
+ pci_write_config(dev, offsAER + PCIR_AER_ROOTERR_CMD, 7, 4);
+ dev_ctl = pci_read_config(dev, offsPCIE + PCIER_DEVICE_CTL, 1);
+ pci_write_config(dev, offsPCIE + PCIER_DEVICE_CTL, (dev_ctl | PCIEM_CTL_COR_ENABLE |
+ PCIEM_CTL_NFER_ENABLE | PCIEM_CTL_FER_ENABLE | PCIEM_CTL_URR_ENABLE), 1);
+ device_printf(dev,"Error Report Enabled\n");
+
+
+ /* Setup Interrupt & resource allocation */
+ sc->sc_irq_rid = -1;
+
+ intr_count = 1;
+ if (pci_msix_count(dev) == 1) {
+ error = pci_alloc_msix(dev, &intr_count);
+ if (!error)
+ sc->sc_irq_rid = 1;
+ } else if (pci_msi_count(dev) > 0) {
+ error = pci_alloc_msi(dev, &intr_count);
+ if (!error)
+ sc->sc_irq_rid = 1;
+ }
+ if (sc->sc_irq_rid < 0)
+ sc->sc_irq_rid = 0;
+
+ sc-> sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
+ &sc->sc_irq_rid, RF_ACTIVE);
+
+ if (!sc->sc_irq_res) {
+ device_printf(dev, "Unable to allocate interrupt resource\n");
+ if (sc->sc_irq_rid == 1)
+ pci_release_msi(dev);
+ return;
+ }
+
+
+ /* Reg intr handler */
+ error = bus_setup_intr(dev, sc->sc_irq_res, INTR_TYPE_MISC,
+ NULL, pcib_intr_handler, sc, &sc->sc_irq_cookie);
+ if (error) {
+ device_printf(dev, "Unable to register interrupt handler\n");
+ bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irq_rid, sc-> sc_irq_res);
+ if (sc->sc_irq_rid == 1)
+ pci_release_msi(dev);
+ return;
+ }
+
+ device_printf(dev, "Interrupt registered\n");
+
+ STAILQ_INIT(&AER_root);
+ mtx_init(&AER_lock, "AER Record", NULL, MTX_SPIN);
+
+}
+
+
+/*
+ * Remove Interrupt & release interrupt handler &
+ * Add interrupt release to bus_generic_detach
+ *
+ * Leave it commented since have not find a way to test it.
+ */
+/*
+static int
+pcib_release_intr_common(struct pcib_softc *sc)
+{
+ device_t dev;
+ int error;
+
+ dev = sc->dev;
+ error = bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_irq_cookie);
+ if (error) {
+ device_printf(dev, "Unable to teardown interrupt\n");
+ return error;
+ }
+
+ error = bus_free_resource(dev, SYS_RES_IRQ, sc->sc_irq_res);
+ if (error) {
+ device_printf(dev, "Unable to free interrupt resouce\n");
+ return error;
+ }
+
+ error = pci_release_msi(dev);
+ if (error)
+ device_printf(dev, "Unable to release interrupt\n");
+
+ return error;
+}
+
+int
+pcib_detach(device_t dev)
+{
+ int error;
+ struct pcib_softc *sc;
+
+ sc = device_get_softc(dev);
+ error = bus_generic_detach(dev);
+ if (error)
+ return (error);
+
+ if (sc->sc_irq_res)
+ error = pcib_release_intr_common(sc);
+
+ return error;
+}
+*/
+
+
void
pcib_attach_common(device_t dev)
{
@@ -914,6 +1250,8 @@
sc = device_get_softc(dev);
sc->dev = dev;
+ pcib_setup_intr_common(sc);
+
/*
* Get current bridge configuration.
*/
@@ -942,6 +1280,7 @@
SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "subbus",
CTLFLAG_RD, &sc->bus.sub, 0, "Subordinate bus number");
+
/*
* Quirk handling.
*/
Index: sys/dev/pci/pcib_private.h
===================================================================
--- sys/dev/pci/pcib_private.h
+++ sys/dev/pci/pcib_private.h
@@ -69,6 +69,36 @@
#define WIN_MEM 0x2
#define WIN_PMEM 0x4
+/*
+ * PCI-Bridge AER support
+ */
+/* AER Bits clear settings based on PCIE base spec. 1.1 */
+#define BCLR_PCIE_STATUS 0xf
+#define BCLR_AER_UNC_STATUS 0x1ff030
+#define BCLR_AER_C_STATUS 0x31c1
+#define BCLR_AER_RTSTATUS 127
+#define AER_REC_MAXLEN 255
+
+struct AER_error_data {
+ int error_number; /* Nth of error starts from 1 */
+ const char *dev_name;
+ int unit; /* device pcib #unit number of source */
+ int pcie_cap_status; /* Status register from PCI-E extended cap. */
+ int unc_err_status; /* uncorrectable error status */
+ int unc_err_seve; /* uncorrectable error severity */
+ int c_err_status; /* correctable error status */
+ int AER_ECRC; /* ECRC check status */
+ int root_err_status; /* Root complex error status */
+ int err_src_ID; /* uncorrectable error source ID */
+ int c_err_src_ID; /* correctable error source ID */
+};
+
+struct AER_node {
+ int index;
+ struct AER_error_data *AER_error_data_container;
+ STAILQ_ENTRY(AER_node) next;
+};
+
struct pcib_window {
pci_addr_t base; /* base address */
pci_addr_t limit; /* topmost address */
@@ -125,6 +155,10 @@
uint16_t secstat; /* secondary bus status register */
uint16_t bridgectl; /* bridge control register */
uint8_t seclat; /* secondary bus latency timer */
+
+ int sc_irq_rid; /* Interrupt handler*/
+ struct resource *sc_irq_res;
+ void *sc_irq_cookie;
};
#define PCIB_SUPPORTED_ARI_VER 1
Index: tools/tools/pci/InjectAER.c
===================================================================
--- /dev/null
+++ tools/tools/pci/InjectAER.c
@@ -0,0 +1,898 @@
+/*-
+ * Copyright (c) 2016 Isilon LLC, EMC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#include <stdio.h>
+#include <dev/pci/pcireg.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/pciio.h>
+#include <sys/ioctl.h>
+#include <sys/fcntl.h> /* O_RDWR */
+#include <sys/queue.h> /* STAILQ */
+#include <stdlib.h> /* malloc */
+#include <errno.h> /* strerror */
+#include <string.h> /* strcpy */
+#include <net/if.h> /* ifreq */
+#include <unistd.h> /* close */
+
+#define DEVICE_LIST_LENGTH 255
+#define INJ_SYSCTL_NAMELEN 8
+#define INJ_SYSCTL_PATH "hw.pci.pcib_probe"
+#define PATH_DEVPCI "/dev/pci"
+#define PCIEM_CAPID 0xFF
+#define PCIR_EXTCAP_MAX 0xFFF
+
+#define METHOD_RBER_AER 1
+#define METHOD_AER 2
+#define METHOD_NO_RBER 3
+#define METHOD_NICFLAG 4
+
+#define WIDTH_32B 4
+#define WIDTH_16B 2
+#define WIDTH_8B 1
+
+void usage();
+void inject_dev(int fatal);
+void inject_if();
+void scan();
+void error_handler(int stage);
+void record_initial();
+void restore_initial();
+int findPCIEOffs(struct pcisel *pi_sel);
+int findAEROffs(struct pcisel *pi_sel);
+
+
+struct config {
+ int bridgeControl;
+ int deviceControl;
+ int deviceUncErrMask;
+ int deviceUncErrSeve;
+ int deviceCErrMask;
+ int previousUP;
+ int previousBUSMAS;
+};
+
+struct dev_loca_entry {
+ struct pcisel bridge_pi_sel;
+ char bridge_name[PCI_MAXNAMELEN + 1];
+ int bridge_unit;
+ struct pcisel device_pi_sel;
+ char device_name[PCI_MAXNAMELEN + 1];
+ int device_unit;
+ int method;
+ STAILQ_ENTRY(dev_loca_entry) next;
+};
+
+static STAILQ_HEAD(,dev_loca_entry) dev_loca_head;
+int fd;
+
+/* Global VARs will be defined after selecting device */
+struct pcisel bridge_sel;
+struct pcisel device_sel;
+struct config initial_config;
+char device_name[IFNAMSIZ];
+int PCIE_offs;
+int AER_offs;
+
+
+
+/*
+ * Read user input, process automatic error-injection,
+ * or guide user thorugh running error-injection.
+ */
+int main(int argc, char **argv)
+{
+ char parm;
+ int automode;
+ int count, select_index, select_fatal, method;
+ struct dev_loca_entry *entry;
+
+ if (argc != 2) {
+ usage(0);
+ return 0;
+ }
+ while ((parm = getopt(argc, argv, "alh")) != -1) {
+ switch(parm) {
+ case 'a':
+ automode = 1;
+ break;
+
+ case 'l':
+ automode = 0;
+ break;
+
+ case 'h':
+ usage();
+ return 0;
+
+ default:
+ usage();
+ return 0;
+ }
+ }
+
+ bzero(&bridge_sel, sizeof(bridge_sel));
+ bzero(&device_sel, sizeof(device_sel));
+ fd = open(PATH_DEVPCI, O_RDWR);
+ if (fd < 0) {
+ printf("Unable to open %s.\n", PATH_DEVPCI);
+ error_handler (1);
+ }
+
+ STAILQ_INIT(&dev_loca_head);
+
+ scan();
+
+ printf("Scan result:\n");
+ if (!STAILQ_FIRST(&dev_loca_head)) {
+ printf("No potentially error-inject-avilabile device found\n");
+ error_handler(0);
+ }
+
+ if (automode) {
+ entry = STAILQ_FIRST(&dev_loca_head);
+ printf("Performing auto error-injetion on: \n");
+ printf("Bridge=%s%d@%d:%d:%d:%d ",entry->bridge_name, entry->bridge_unit,
+ entry->bridge_pi_sel.pc_domain,entry->bridge_pi_sel.pc_bus,
+ entry->bridge_pi_sel.pc_dev,entry->bridge_pi_sel.pc_func);
+ printf("Device=%s%d@%d:%d:%d:%d ",entry->device_name, entry->device_unit,
+ entry->device_pi_sel.pc_domain,entry->device_pi_sel.pc_bus,
+ entry->device_pi_sel.pc_dev,entry->device_pi_sel.pc_func);
+ printf("Method=%d\n", entry->method);
+ bridge_sel = entry->bridge_pi_sel;
+ device_sel = entry->device_pi_sel;
+ sprintf(device_name, "%s%d", entry->device_name, entry->device_unit);
+ select_fatal = 0;
+ PCIE_offs = findPCIEOffs(&device_sel);
+ AER_offs = findAEROffs(&device_sel);
+ if (entry->method == METHOD_NICFLAG)
+ inject_if();
+ else
+ inject_dev(select_fatal);
+ close(fd);
+ printf("Error injection complete.\n");
+ return 0;
+ }
+
+ count = 0;
+ STAILQ_FOREACH(entry, &dev_loca_head, next) {
+ printf("Index: %d ", count);
+ printf("Bridge: %s%d@%d:%d:%d:%d ",entry->bridge_name, entry->bridge_unit,
+ entry->bridge_pi_sel.pc_domain,entry->bridge_pi_sel.pc_bus,
+ entry->bridge_pi_sel.pc_dev,entry->bridge_pi_sel.pc_func);
+ printf("Device: %s%d@%d:%d:%d:%d ",entry->device_name, entry->device_unit,
+ entry->device_pi_sel.pc_domain,entry->device_pi_sel.pc_bus,
+ entry->device_pi_sel.pc_dev,entry->device_pi_sel.pc_func);
+ printf("Method=");
+ switch (entry->method) {
+ case (METHOD_RBER_AER):
+ printf ("Probing: COR/Fatal");
+ break;
+ case (METHOD_AER):
+ printf ("Probing: non-Fatal/Fatal");
+ break;
+ case (METHOD_NO_RBER):
+ printf ("Probing: non-fatal");
+ break;
+ case (METHOD_NICFLAG):
+ printf ("NIC flag: non-fatal");
+ break;
+ default:
+ error_handler(2);
+ }
+ printf("\n");
+ count ++;
+ }
+
+ printf("Select a combination to try error injection.\n");
+ select_index = 0;
+ while(1) {
+ printf("Input index (0~%d): ", count - 1);
+ scanf("%d", &select_index);
+ if (select_index < count && select_index >= 0)
+ break;
+ }
+
+ printf("Get: %d\n", select_index);
+
+ count = 0;
+ STAILQ_FOREACH(entry, &dev_loca_head, next) {
+ if (count == select_index) {
+ bridge_sel = entry->bridge_pi_sel;
+ device_sel = entry->device_pi_sel;
+ sprintf(device_name, "%s%d", entry->device_name, entry->device_unit);
+ method = entry->method;
+ break;
+ }
+ count ++;
+ }
+
+ printf("Selected: ");
+ printf("Index=%d ", count);
+ printf("DeviceName=%s ", device_name);
+ printf("Method=%d\n", method);
+
+ select_fatal = 0;
+ printf ("\n");
+ printf ("Expected: ");
+ switch (method) {
+ case METHOD_RBER_AER:
+ while (1) {
+ printf ("Select error type ");
+ printf ("(0=correctable, 1=fatal): ");
+ scanf("%d", &select_fatal);
+ if (select_fatal == 0 | select_fatal == 1)
+ break;
+ }
+ if (select_fatal)
+ printf ("Uncorrectable fatal error ");
+ else
+ printf ("Correctable error ");
+ printf ("record on root port status ");
+ break;
+
+ case METHOD_AER:
+ while (1) {
+ printf ("Select error type ");
+ printf ("(0=non-fatal, 1=fatal): ");
+ scanf("%d", &select_fatal);
+ if (select_fatal == 0 | select_fatal == 1)
+ break;
+ }
+ if (select_fatal)
+ printf ("Uncorrectable fatal error ");
+ else
+ printf ("Uncorrectable non-fatal error ");
+ printf ("record on root port status ");
+ break;
+
+ case METHOD_NO_RBER:
+ printf ("Uncorrectable non-fatal error ");
+ printf ("record on root port status ");
+ break;
+
+ case METHOD_NICFLAG:
+ printf ("Uncorrectable non-fatal error ");
+ printf ("record on root port status, and\n");
+ printf ("Error 'Unsupported Request' record on ");
+ printf ("bridge AER Uncorrectable error status\n");
+ break;
+
+ default:
+ error_handler (2);
+ break;
+ }
+ printf ("after injection.\n");
+ printf ("\n");
+
+
+ PCIE_offs = findPCIEOffs(&device_sel);
+ AER_offs = findAEROffs(&device_sel);
+ printf("Device: PCI-E Extd Config Space @ %X \n", PCIE_offs);
+ if (AER_offs)
+ printf("Device: AER Cap Config Space @ %X \n", AER_offs);
+ else
+ printf("Device: AER Not supported\n");
+
+ if (method == METHOD_NICFLAG)
+ inject_if();
+ else
+ inject_dev(select_fatal);
+
+ close(fd);
+
+ printf("Error injection complete.\n");
+ printf("\n");
+
+ return 0;
+}
+
+/*
+ * Attempt to probe non-existent function to trigger UR error.
+ * error message is generated by device and forwarded by bridge.
+ */
+void inject_dev(int fatal)
+{
+ int name[INJ_SYSCTL_NAMELEN];
+ int buffer;
+ size_t len, size;
+ struct pci_io bridge_io;
+ struct pci_io device_io;
+ bzero(&bridge_io, sizeof(struct pci_io));
+ bzero(&device_io, sizeof(struct pci_io));
+ bridge_io.pi_sel = bridge_sel;
+ device_io.pi_sel = device_sel;
+
+ /* Record initial value */
+ record_initial();
+
+ /* Clear MasterAbort & Error Received */
+ bridge_io.pi_width = WIDTH_16B;
+ bridge_io.pi_reg = PCIR_SECSTAT_1;
+ bridge_io.pi_data = PCIM_STATUS_RMABORT | PCIM_STATUS_SERR;
+ if (ioctl(fd, PCIOCWRITE, &bridge_io))
+ error_handler(31);
+
+ /* Enable error forwarding */
+ bridge_io.pi_width = WIDTH_8B;
+ bridge_io.pi_reg = PCIR_BRIDGECTL_1;
+ bridge_io.pi_data = initial_config.bridgeControl | PCIB_BCR_SERR_ENABLE;
+ if (ioctl(fd, PCIOCWRITE, &bridge_io))
+ error_handler(31);
+
+ /* Enable error report */
+ device_io.pi_width = WIDTH_8B;
+ device_io.pi_reg = PCIE_offs+PCIER_DEVICE_CTL;
+ device_io.pi_data = initial_config.deviceControl |
+ PCIEM_CTL_URR_ENABLE |
+ PCIEM_CTL_FER_ENABLE |
+ PCIEM_CTL_NFER_ENABLE |
+ PCIEM_CTL_COR_ENABLE;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(32);
+
+ /* AER registers */
+ if (AER_offs) {
+ device_io.pi_width = WIDTH_32B;
+
+ /* Clear UR status */
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_STATUS;
+ device_io.pi_data = PCIM_AER_UC_UNSUPPORTED_REQUEST;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(33);
+
+ /* Clear AdvsErr status */
+ device_io.pi_reg = AER_offs + PCIR_AER_COR_STATUS;
+ device_io.pi_data = PCIM_AER_COR_ADVISORY_NF_ERROR;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(33);
+
+ /* Clear Uncorrectable error mask for UR */
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_MASK;
+ device_io.pi_data = (initial_config.deviceCErrMask &
+ (~PCIM_AER_UC_UNSUPPORTED_REQUEST));
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(33);
+
+ /* Clear Correctable error mask for advs. unc. error */
+ device_io.pi_reg = AER_offs + PCIR_AER_COR_MASK;
+ device_io.pi_data = (initial_config.deviceCErrMask &
+ (~PCIM_AER_COR_ADVISORY_NF_ERROR));
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(33);
+
+ /* Fatal or Non-fatal error */
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_SEVERITY;
+ if (fatal)
+ device_io.pi_data = (initial_config.deviceUncErrSeve |
+ PCIM_AER_UC_UNSUPPORTED_REQUEST);
+ else
+ device_io.pi_data = (initial_config.deviceUncErrSeve &
+ (~PCIM_AER_UC_UNSUPPORTED_REQUEST));
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(33);
+ printf("Uncorrectable Error Severity set to: %X\n", device_io.pi_data);
+ }
+
+
+ /* Probe non-existent function */
+ len = INJ_SYSCTL_NAMELEN;
+ sysctlnametomib(INJ_SYSCTL_PATH, name, &len);
+ name[3] = device_io.pi_sel.pc_bus;
+ name[4] = device_io.pi_sel.pc_dev;
+ name[5] = PCI_FUNCMAX;
+ name[6] = 0;
+ name[7] = WIDTH_32B;
+
+ size = sizeof(int);
+ if (sysctl(name, INJ_SYSCTL_NAMELEN, &buffer, &size, NULL, 0))
+ error_handler(34);
+
+ /* Verify MasterAbort & Error Received */
+ bridge_io.pi_width = WIDTH_16B;
+ bridge_io.pi_reg = PCIR_SECSTAT_1;
+ if (ioctl(fd, PCIOCREAD, &bridge_io))
+ error_handler(35);
+ printf("Bridge sec. status after injection: %X\n", bridge_io.pi_data);
+
+ if (AER_offs) {
+ device_io.pi_width = WIDTH_32B;
+
+ /* Clear UR status */
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_STATUS;
+ device_io.pi_data = PCIM_AER_UC_UNSUPPORTED_REQUEST;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(35);
+
+ /* Clear AdvsErr status */
+ device_io.pi_reg = AER_offs + PCIR_AER_COR_STATUS;
+ device_io.pi_data = PCIM_AER_COR_ADVISORY_NF_ERROR;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(35);
+ }
+
+ /* Clear MasterAbort & Error Received */
+ bridge_io.pi_data = PCIM_STATUS_RMABORT | PCIM_STATUS_SERR;
+ if (ioctl(fd, PCIOCWRITE, &bridge_io))
+ error_handler(35);
+
+ /* Restore */
+ restore_initial();
+}
+
+
+/*
+ * Disabling busmaster and set NIC flag to trigger UR errors.
+ * Error is generated by bridge.
+ */
+void inject_if()
+{
+ int sockfd;
+ char input;
+ struct pci_io bridge_io;
+ struct ifreq ifr;
+
+ sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sockfd < 0)
+ error_handler(51);
+
+ bzero(&initial_config, sizeof(initial_config));
+ bzero(&bridge_io, sizeof(bridge_io));
+ bridge_io.pi_sel = bridge_sel;
+ bzero(&ifr, sizeof(ifr));
+ strcpy(ifr.ifr_name, device_name);
+ if (ioctl(sockfd, SIOCGIFFLAGS, &ifr))
+ error_handler (52);
+ printf("Device %s current lower-flags: %X\n", device_name, ifr.ifr_flags);
+ if ((ifr.ifr_flags & IFF_UP) == IFF_UP) {
+ initial_config.previousUP = 1;
+ printf("This device is currently in up state, ");
+ printf("continue may bring link down.\n");
+ while (1) {
+ printf("Continue (y/n) ? ");
+ input = getchar();
+ if (input == 'y')
+ break;
+ else if (input == 'n')
+ error_handler (0);
+ }
+ }
+
+ /* Disable NIC first */
+ ifr.ifr_flags = ifr.ifr_flags & (~IFF_UP);
+ if (ioctl(sockfd, SIOCSIFFLAGS, &ifr))
+ error_handler (53);
+
+ /* Disable Bridge's BusMaster */
+ bridge_io.pi_width = WIDTH_8B;
+ bridge_io.pi_reg = PCIR_COMMAND;
+ if (ioctl(fd, PCIOCREAD, &bridge_io))
+ error_handler(53);
+ initial_config.previousBUSMAS = bridge_io.pi_data;
+ if (!(initial_config.previousBUSMAS & PCIM_CMD_BUSMASTEREN)) {
+ printf ("BusMaster is previously disabled:");
+ printf ("Unexpected settings. Quit\n");
+ error_handler(0);
+ }
+ bridge_io.pi_data = bridge_io.pi_data & (~PCIM_CMD_BUSMASTEREN);
+ if (ioctl(fd, PCIOCWRITE, &bridge_io))
+ error_handler(53);
+
+ /* Toggle NIC flag */
+ ifr.ifr_flags = ifr.ifr_flags | IFF_UP;
+ if (ioctl(sockfd, SIOCSIFFLAGS, &ifr))
+ error_handler (52);
+ ifr.ifr_flags = ifr.ifr_flags & (~IFF_UP);
+ if (ioctl(sockfd, SIOCSIFFLAGS, &ifr))
+ error_handler (52);
+
+ /* Bring back BusMaster */
+ bridge_io.pi_data = initial_config.previousBUSMAS;
+ if (ioctl(fd, PCIOCWRITE, &bridge_io))
+ error_handler(53);
+
+ /* Restore NIC flag */
+ if (initial_config.previousUP) {
+ ifr.ifr_flags = ifr.ifr_flags | IFF_UP;
+ if (ioctl(sockfd, SIOCSIFFLAGS, &ifr))
+ error_handler (52);
+ }
+}
+
+/*
+ * Record settings before performing any modification.
+ */
+void record_initial()
+{
+ struct pci_io bridge_io;
+ struct pci_io device_io;
+ bzero(&bridge_io, sizeof(bridge_io));
+ bzero(&device_io, sizeof(device_io));
+ bzero(&initial_config, sizeof(initial_config));
+ bridge_io.pi_sel = bridge_sel;
+ device_io.pi_sel = device_sel;
+
+ bridge_io.pi_width = WIDTH_8B;
+ bridge_io.pi_reg = PCIR_BRIDGECTL_2;
+ if (ioctl(fd, PCIOCREAD, &bridge_io))
+ error_handler(21);
+ initial_config.bridgeControl = bridge_io.pi_data;
+ bridge_io.pi_width = WIDTH_16B;
+ if (ioctl(fd, PCIOCREAD, &bridge_io))
+ error_handler(21);
+ printf("Initial bridge control: %X\n",bridge_io.pi_data);
+
+ device_io.pi_width = WIDTH_8B;
+ device_io.pi_reg = PCIE_offs + PCIER_DEVICE_CTL;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(22);
+ initial_config.deviceControl = device_io.pi_data;
+ device_io.pi_width = WIDTH_16B;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(22);
+ printf("Initial device control: %X\n",device_io.pi_data);
+
+ bridge_io.pi_width = WIDTH_16B;
+ bridge_io.pi_reg = PCIR_SECSTAT_1;
+ if (ioctl(fd, PCIOCREAD, &bridge_io))
+ error_handler(21);
+ printf("SecondaryStatus before inject: %X\n", bridge_io.pi_data);
+
+ if (AER_offs) {
+ device_io.pi_width = WIDTH_32B;
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_STATUS;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(23);
+ printf("AER Uncorrectable error status before inject: %X\n",
+ device_io.pi_data);
+
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_MASK;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(23);
+ initial_config.deviceUncErrMask = device_io.pi_data;
+
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_SEVERITY;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(23);
+ initial_config.deviceUncErrSeve = device_io.pi_data;
+
+ device_io.pi_reg = AER_offs + PCIR_AER_COR_STATUS;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(23);
+ printf("AER Correctable error status before inject: %X\n",
+ device_io.pi_data);
+
+ device_io.pi_reg = AER_offs + PCIR_AER_COR_MASK;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(23);
+ initial_config.deviceCErrMask = device_io.pi_data;
+ }
+}
+
+/*
+ * Restore settings after error-injection, or run-time error.
+ */
+void restore_initial()
+{
+ struct pci_io bridge_io;
+ struct pci_io device_io;
+ bzero(&bridge_io, sizeof(bridge_io));
+ bzero(&device_io, sizeof(device_io));
+ bridge_io.pi_sel = bridge_sel;
+ device_io.pi_sel = device_sel;
+
+ bridge_io.pi_width = WIDTH_8B;
+ bridge_io.pi_reg = PCIR_BRIDGECTL_2;
+ bridge_io.pi_data = initial_config.bridgeControl;
+ if (ioctl(fd, PCIOCWRITE, &bridge_io))
+ error_handler(41);
+ bridge_io.pi_width = WIDTH_16B;
+ if (ioctl(fd, PCIOCREAD, &bridge_io))
+ error_handler(41);
+ printf("Restored bridge control: %X\n",bridge_io.pi_data);
+
+ device_io.pi_width = WIDTH_8B;
+ device_io.pi_reg = PCIE_offs + PCIER_DEVICE_CTL;
+ device_io.pi_data = initial_config.deviceControl;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(42);
+ device_io.pi_width = WIDTH_16B;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(42);
+ printf("Restored device control: %X\n",device_io.pi_data);
+
+ if (AER_offs) {
+ device_io.pi_width = WIDTH_32B;
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_SEVERITY;
+ device_io.pi_data = initial_config.deviceUncErrSeve;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(43);
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(43);
+ printf("Restored Uncorrectable Error Severity: %X\n",
+ device_io.pi_data);
+
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_MASK;
+ device_io.pi_data = initial_config.deviceUncErrMask;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(43);
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(43);
+ printf("Restored Uncorrectable Error Mask: %X\n",
+ device_io.pi_data);
+
+ device_io.pi_reg = AER_offs + PCIR_AER_COR_MASK;
+ device_io.pi_data = initial_config.deviceCErrMask;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(43);
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(43);
+ printf("Restored Correctable Error Mask: %X\n",
+ device_io.pi_data);
+ }
+}
+
+
+/* General helpers */
+
+/*
+ * Handles runtime errors.
+ * try restoring configuration if any modification is made,
+ * or print configuration if cannot restore.
+ */
+void error_handler(int stage)
+{
+ if (stage) {
+ printf("\n");
+ printf("ERROR! Error stage NO.: %d\n",stage);
+ printf("Translation (errno): %s\n",strerror(errno));
+ }
+
+ printf("\n");
+ switch (stage / 10) {
+ case 3:
+ printf("Try restoring initial configration\n");
+ restore_initial();
+ printf("Restore success\n");
+ break;
+
+ case 4:
+ printf("Restore configration failed.\n");
+ printf("Print stored initial configration:\n");
+ printf("* bridge control [8 bits]: %X\n", initial_config.bridgeControl);
+ printf("* device control [8 bits]: %X\n", initial_config.deviceControl);
+ printf("* device UNC Mask [AER][32 bits]: %X\n", initial_config.deviceUncErrMask);
+ printf("* device UNC Severity [AER][32 bits]: %X\n", initial_config.deviceUncErrSeve);
+ printf("* device COR Mask [AER][32 bits]: %X\n", initial_config.deviceCErrMask);
+ break;
+
+ case 5:
+ printf("Status before error-injection:\n");
+ printf("* NIC device flag UP: %d\n", initial_config.previousUP);
+ printf("* Bridge busMaster enable: %d\n", initial_config.previousBUSMAS);
+ break;
+
+ default:
+ printf("No configuration changed\n");
+ break;
+ }
+ abort ();
+}
+
+/*
+ * Return the offset of PCI-Express capbility config space.
+ * Return 0 if not found.
+ */
+int findPCIEOffs(struct pcisel *pi_sel)
+{
+ struct pci_io io;
+ bzero(&io, sizeof(io));
+ int pointer;
+ int offset = 0;
+ io.pi_width = WIDTH_8B;
+ io.pi_reg = PCIR_CAP_PTR;
+ io.pi_sel = *pi_sel;
+ if (ioctl(fd, PCIOCREAD, &io))
+ error_handler(101);
+ pointer = io.pi_data;
+ io.pi_width = WIDTH_16B;
+ while (pointer < PCIR_EXTCAP && pointer > 0) {
+ io.pi_reg = pointer;
+ if (ioctl(fd, PCIOCREAD, &io))
+ error_handler(101);
+ if ((io.pi_data & PCIEM_CAPID) == PCIY_EXPRESS) {
+ offset = pointer;
+ break;
+ }
+ pointer = (io.pi_data) >> 8;
+ }
+ return pointer;
+}
+
+/*
+ * Return the offset of PCI-Express AER capbility configuration space.
+ * Return 0 if not found.
+ */
+int findAEROffs(struct pcisel *pi_sel)
+{
+ struct pci_io io;
+ bzero(&io, sizeof(io));
+ int pointer;
+ int offset = 0;
+ io.pi_width = WIDTH_32B;
+ io.pi_sel = *pi_sel;
+ pointer = PCIR_EXTCAP;
+ while (pointer < PCIR_EXTCAP_MAX &&
+ pointer >= PCIR_EXTCAP) {
+ io.pi_reg = pointer;
+ if (ioctl(fd, PCIOCREAD, &io))
+ error_handler(102);
+ if (PCI_EXTCAP_ID(io.pi_data) == PCIZ_AER) {
+ offset = pointer;
+ break;
+ }
+ pointer = PCI_EXTCAP_NEXTPTR(io.pi_data);
+ }
+ return offset;
+}
+
+
+/* Scan and scan helpers */
+
+/*
+ * Record any avaliable methods into the singly-linked tail queue.
+ */
+void recordAvbl(int bgidx, int devidx, struct pci_conf *buffer, int method)
+{
+ struct dev_loca_entry *entry;
+ entry = malloc(sizeof(*entry));
+ entry->bridge_pi_sel = buffer[bgidx].pc_sel;
+ strcpy(entry->bridge_name, buffer[bgidx].pd_name);
+ entry->bridge_unit = buffer[bgidx].pd_unit;
+ entry->device_pi_sel = buffer[devidx].pc_sel;
+ strcpy(entry->device_name, buffer[devidx].pd_name);
+ entry->device_unit = buffer[devidx].pd_unit;
+ entry->method = method;
+ if (method == METHOD_NICFLAG)
+ STAILQ_INSERT_TAIL(&dev_loca_head, entry, next);
+ else
+ STAILQ_INSERT_HEAD(&dev_loca_head, entry, next);
+}
+
+
+/*
+ * Check what kind of error-injection method can apply on certain bridge-device combination.
+ */
+void detectAvbl(int bgidx, int devidx, struct pci_conf *buffer, int pcie_offs)
+{
+ struct pci_io io;
+ int RBER_enable;
+
+ /* Check1: Network Interface */
+ if (buffer[devidx].pc_class == PCIC_NETWORK &&
+ buffer[devidx].pc_subclass == PCIS_NETWORK_ETHERNET)
+ recordAvbl(bgidx, devidx, buffer, METHOD_NICFLAG);
+
+ /* Check2: Role-Based Error Reporting */
+ RBER_enable = 0;
+ bzero(&io, sizeof(io));
+ io.pi_sel = buffer[devidx].pc_sel;
+ io.pi_width = WIDTH_16B;
+ io.pi_reg = PCIER_DEVICE_CAP + pcie_offs;
+ if (ioctl(fd, PCIOCREAD, &io))
+ error_handler(13);
+ RBER_enable = (io.pi_data & PCIEM_CAP_ROLE_ERR_RPT) >> 15;
+
+ /* If PCI_FUNCMAX is assigned, cannot take the device. */
+ io.pi_sel.pc_func = PCI_FUNCMAX;
+ if (!ioctl(fd, PCIOCREAD, &io))
+ return;
+ if (findAEROffs(&buffer[devidx].pc_sel)) {
+ if (RBER_enable)
+ recordAvbl(bgidx, devidx, buffer, METHOD_RBER_AER);
+ else
+ recordAvbl(bgidx, devidx, buffer, METHOD_AER);
+ } else if (!RBER_enable)
+ recordAvbl(bgidx, devidx, buffer, METHOD_NO_RBER);
+}
+
+/*
+ * Scan through the children of a bridge.
+ */
+void scanChild(int bgidx, struct pci_conf *buffer, int num_matches)
+{
+ int pcie_offs, bus, i;
+ struct pci_io scan_io;
+ bzero(&scan_io, sizeof(scan_io));
+ scan_io.pi_width = WIDTH_16B;
+ scan_io.pi_reg = PCIR_PRIBUS_1;
+ scan_io.pi_sel = buffer[bgidx].pc_sel;
+ if(ioctl(fd, PCIOCREAD, &scan_io))
+ error_handler(12);
+ bus = scan_io.pi_data >> 8;
+ for (i = 0; i < num_matches; i++) {
+ if (buffer[i].pc_sel.pc_bus != bus)
+ continue;
+ pcie_offs = findPCIEOffs(&buffer[i].pc_sel);
+ if (pcie_offs)
+ detectAvbl(bgidx, i, buffer, pcie_offs);
+ }
+}
+
+/*
+ * Using ioctl to get the device list from kernel.
+ * Only search for bridges first.
+ */
+void scan()
+{
+ int i;
+ struct pci_conf_io pciconfio;
+ struct pci_conf buffer[DEVICE_LIST_LENGTH];
+ bzero(&pciconfio, sizeof(pciconfio));
+ pciconfio.match_buf_len = sizeof(buffer);
+ pciconfio.matches = buffer;
+ printf("Perform device list scan..\n");
+ if (ioctl(fd, PCIOCGETCONF, &pciconfio))
+ error_handler(11);
+ switch (pciconfio.status) {
+ case PCI_GETCONF_LAST_DEVICE:
+ break;
+ case PCI_GETCONF_LIST_CHANGED:
+ printf("Device list changes, please retry\n");
+ error_handler(0);
+ case PCI_GETCONF_MORE_DEVS:
+ printf("Increase DEVICE_LIST_LENGTH and retry\n");
+ error_handler(0);
+ case PCI_GETCONF_ERROR:
+ error_handler(11);
+ break;
+ default:
+ error_handler(11);
+ }
+
+ for (i = 0; i < pciconfio.num_matches; i++) {
+ if (buffer[i].pc_sel.pc_bus != 0)
+ continue;
+ if (buffer[i].pc_class != PCIC_BRIDGE ||
+ buffer[i].pc_subclass != PCIS_BRIDGE_PCI)
+ continue;
+ if (!findPCIEOffs(&buffer[i].pc_sel))
+ continue;
+ if (!findAEROffs(&buffer[i].pc_sel))
+ continue;
+ scanChild(i, buffer, pciconfio.num_matches);
+ }
+}
+
+/* Usage and description */
+void usage()
+{
+ fprintf(stderr, "%s\n%s\n%s\n",
+ "usage: InjectAER -a: automatically try error-injection on a device.",
+ " InjectAER -l: list available devices and methods and guide selection.",
+ " InjectAER -h: Usage.");
+}
Index: tools/tools/pci/README
===================================================================
--- /dev/null
+++ tools/tools/pci/README
@@ -0,0 +1,177 @@
+
+ PCI-E root port bridge error injection tool
+ -------------------------------------------------
+
+* Usage:
+ compiles using C99. Running on root privilege.
+
+ InjectAER -a: automatically try error-injection on a device.",
+ InjectAER -l: wizard, list available devices and methods and guide selection.",
+ InjectAER -h: Usage."
+
+
+* Meaning of Methods:
+ Probing: Using configuration space request to probe a non-existent function.
+ NIC_flag: Set NIC flag when BusMaster is disabled.
+
+ Details see background info.
+
+
+* Types of error can possibly injected:
+ COR/: Correctable error/
+ non-Fatal/: non-fatal uncorrectable error/
+ Fatal. fatal uncorrectable error.
+
+ Details see background info.
+
+
+* Limitation:
+ This program relies on the AER driver support patch on dev/pci/pci_pci.c
+
+ Probing method:
+ I. A PCI-E root port bridge device (CLASS=0x060400) supports AER.
+ II. A PCI-E device on the secondary bus under the bridge.
+ III. The device “supports AER” or “has not implemented Role-Based Error Reporting”.
+ IV. The device has not implement last function (current standard is function 7).
+
+ * Since the bridge only forwards the error, the bridge’s AER status registers will not
+ record any detailed record. Instead, the root port status is expected to record
+ corresponding error.
+
+
+ NIC_flag method:
+ I. A PCI-E root port bridge device (CLASS=0x060400) supports AER.
+ II. A PCI-E Ethernet device on the secondary bus under the bridge. (Class=0x020000)
+ III. The Ethernet device should be unused (to prevent from connection lost).
+
+
+* If Runtime Error: Error Stage Description:
+ 0: normal abort
+ ----Main------
+ 1: open device character file
+ 2: Unexpected method selection
+ ----scan------
+ 11: Retrive device list
+ 12: Using ioctl access device config space
+ 13: Using ioctl access device PCIE extended config sapce
+ ----Inject_dev(Recording initial settings)------
+ 21: Using ioctl to access bridge config
+ 22: Using ioctl to access device config
+ 23: Using ioctl to access device AER cap
+ ----Inject_dev--------
+ 31: ioctl modify settings on bridge config
+ 32: ioctl modify settings on device config
+ 33: ioctl modify settings on device AER cap
+ 34: sysctl calling device probing
+ 35: After probing: ioctl clearing bits
+ ----Inject_dev(Restore initial settings)------
+ 41: Using ioctl to access bridge config
+ 42: Using ioctl to access device config
+ 43: Using ioctl to access device AER cap
+ ----Inject_if-----------
+ 51: Open socket
+ 52: Using ioctl to access device flag
+ 53: Using ioctl to access bridge config
+ ----General Helpers----
+ 101: ioctl searching for PCIE extended config space
+ 102: ioctl searching for AER cap config sapce
+
+
+* Background (This program)
+
+ The program can be run with different parameters that leads to automatic mode or
+ wizard mode. In automatic mode, the program will pick the first bridge-device
+ combination in the queue and performs a non-fatal error injection using the first
+ recorded method. In wizard mode, the program will let the user choose which
+ bridge-device combination to perform injection and which method to use. If the
+ device is capable to inject fatal error, the program will also prompt user to choose.
+
+ Before any attempt to make change on device, the program will first save current
+ configurations and print current error status. Configurations are saved in a global
+ structure “initial_config”. After completion of error-injection, the program will then
+ restore previous configurations and clear error status.
+
+ If an runtime error happens, the error handler will print stage number and errno translation.
+ If the error happens after some configuration modification to the device,
+ the error handler will try restore the configuration. If the restoration failed,
+ it will print out all the configuration saved previously.
+
+
+
+
+* Background (Probing)
+
+ Device probing is commonly used when the system initializes and tries to scan all the peripheral
+ devices attached to the system. It is normally done by sending configuration-space read
+ requests to the PCI bus, with specific device (slot) number and function number.
+
+ By default, a request will receive a “Master abort” completion status and return value of all ‘1’s
+ if the request reaches non-existent device or function. In addition to that, a PCI-E device will
+ generate “Unsupported Request” error message, but that will not be signaled to the root to
+ trigger an interrupt by most default settings.
+
+ The goal is try to clear the path for the “Unsupported Request” error message to be successfully
+ forwarded to the root. After making many tests on my hardware from Intel, I found the most
+ effective way to achieve the goal is causing “Unsupported Request” on an end-point devices,
+ and let the upstream PCI-E bridges to forward the error messages to root ports, which will then
+ generate the interrupt.
+
+ End-point devices varies their behaviors a lot for different hardware. Based on all the tests I
+ made on my available hardware, there are two important factors that affect the behavior of
+ a device when it receives an invalid configuration space access request: the implement of
+ Role-Based Error Reporting (RBER) and AER.
+
+ I. The device implements neither RBER nor AER.
+ In this case, the invalid configuration space request will be treated as “unsupported
+ request” non-fatal error. By enabling the “unsupported request” report and non-
+ fatal error report bits in device control section, the device will send a non-fatal
+ uncorrectable error message to upstream bridge, “ERR_NONFATAL”.
+
+ II. The device implements AER but not RBER.
+ This case is similar to the first case that the error message can be sent by enabling
+ error reporting bits. Better than the first case, I can change the severity of
+ “unsupported request” in the AER uncorrectable error severity section, and will be
+ able to send “ERR_NONFATAL” and “ERR_FATAL” message to the bridge.
+
+ III. The device implements both RBER and AER.
+ Starting from PCI-E specification ver. 1.1, PCI-E devices are required to implement
+ RBER. In this case, with RBER, the device will “be smart” and change the type of
+ error signaling based on the error detection agent. When the device receives an
+ invalid configuration space access request, the device with RBER will treat the
+ “unsupported request” non-fatal error as a masked “advisory non-fatal” correctable
+ error, in order to avoid disturbing the probing process. With AER support, I can clear
+ the mask for “advisory non-fatal” error on the AER correctable error mask section,
+ to let the device send an “ERR_COR” message. In addition to that, I can avoid the
+ participation of RBER by changing the severity of “unsupported request” error to
+ “FATAL”, and the device will send an “ERR_FATAL” message.
+
+ IV. The device implements RBER but not AER
+ Unfortunately, this is a dead end. Without AER, I cannot change the severity of
+ “unsupported request” to “fatal”, or let the device report correctable error. The
+ error signaling flow chart on the specific sheet (pp. 291 [1]) also implies the result.
+
+ * Some thoughts:
+ All cases in this method rely on a bridge device and a child device,
+ where the child sends error message, and the bridge forwards it. Technically we should be able
+ to apply the same trick to a bridge device, which could be better because we do not need to
+ deal with the children. However, I did tons on tests on my PCI-E root port bridges but none of
+ them shows any sign of generating an error to respond an invalid configuration space request.
+ According to the flowchart (pp. 291 [1]) there should be at least a correctable error recorded in
+ AER status, but my devices disagree with that.
+
+
+
+* Background (NIC_flag)
+
+ I can disable the “Bus Master Enable” bits on the bridge, and then change the flag on the sub
+ stream NIC device. After that, the bridge will regard all the I/O requests from the device to be
+ “unsupported request”. This error is generated on the bridge, so not only the root port status
+ records the error, but also the AER status of the bridge will reflect the error detail.
+
+ * I have not done many tests on this method. The only problem
+ I noticed is the NIC may become unstable and lost link after performing injection.
+
+* Reference
+ [1]: PCI-Express Base Specification Revision 1.1
+
+$FreeBSD$
\ No newline at end of file
Index: tools/tools/pci/ReadAER.c
===================================================================
--- /dev/null
+++ tools/tools/pci/ReadAER.c
@@ -0,0 +1,135 @@
+/*-
+ * Copyright (c) 2016 Isilon LLC, EMC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+
+
+struct AER_error_data {
+ int error_number;
+ const char *dev_name;
+ int unit;
+ int pcie_cap_status;
+ int unc_err_status;
+ int unc_err_seve;
+ int c_err_status;
+ int AER_ECRC;
+ int root_err_status;
+ int err_src_ID;
+ int c_err_src_ID;
+};
+
+void printThings(struct AER_error_data error_data);
+
+int main(int argc, char *argv[])
+{
+ int error;
+ int indexnum;
+ int name[4];
+ int count;
+ struct AER_error_data error_data;
+ size_t size;
+
+ size = sizeof(int);
+ error = sysctlbyname("hw.pci.pcib_error_count", &count, &size, NULL, 0);
+ if (error != 0) {
+ printf("Error: %d\n", error);
+ printf("Errno Translate: %s\n", strerror(errno));
+ return 0;
+ }
+
+ printf("Total %d error(s).\n", count);
+ printf("Old records may be overwritten, depends on driver's record length limit.\n");
+
+ if (argc != 2) {
+ printf("arg error: expect one argument: index of error to fetch, or -1 to fetch last record\n");
+ return 0;
+ }
+ indexnum = atoi(argv[1]);
+
+ if ((indexnum < -1) || (indexnum > count - 1) || (count == 0)) {
+ printf("Invalid argument\n");
+ return 0;
+ }
+
+ if (indexnum == -1)
+ indexnum = count -1;
+
+ size = 4;
+ sysctlnametomib("hw.pci.pcib_error_records", name, &size);
+ name[3] = indexnum;
+
+ bzero(&error_data, sizeof(error_data));
+ size = sizeof(error_data);
+
+ error = sysctl(name, 4, &error_data, &size, NULL, 0);
+ if (error != 0) {
+ printf("Error: %d\n", error);
+ printf("Errno Translate: %s\n", strerror(errno));
+ } else
+ printThings(error_data);
+ return 0;
+}
+
+void printDecHex(int value)
+{
+ printf(" %d, HEX: %X\n", value, value);
+}
+
+void printThings(struct AER_error_data error_data)
+{
+ printf("-------------------------------------\n");
+ printf("Error NO.%d\n", error_data.error_number);
+ printf("Error Source: pcib%d\n", error_data.unit);
+ printf("-------\n");
+
+ printf("Get: pcie_cap_status:");
+ printDecHex(error_data.pcie_cap_status);
+ printf("Get: unc_err_status:");
+ printDecHex(error_data.unc_err_status);
+ printf("Get: unc_err_seve:");
+ printDecHex(error_data.unc_err_seve);
+ printf("Get: c_err_status:");
+ printDecHex(error_data.c_err_status);
+ printf("Get: AER_ECRC:");
+ printDecHex(error_data.AER_ECRC);
+ printf("Get: root_err_status:");
+ printDecHex(error_data.root_err_status);
+ printf("Get: err_src_ID:");
+ printDecHex(error_data.err_src_ID);
+ printf("Get: c_err_src_ID:");
+ printDecHex(error_data.c_err_src_ID);
+
+ printf("------------------------------------\n");
+}
+
+
+
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jan 21, 9:32 AM (10 h, 37 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27799262
Default Alt Text
D7697.diff (48 KB)
Attached To
Mode
D7697: Add AER register reporting support via sysctl
Attached
Detach File
Event Timeline
Log In to Comment