Page MenuHomeFreeBSD

D7697.diff
No OneTemporary

D7697.diff

Index: sys/dev/pci/pci_pci.c
===================================================================
--- sys/dev/pci/pci_pci.c
+++ sys/dev/pci/pci_pci.c
@@ -43,6 +43,10 @@
#include <sys/rman.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
+#include <sys/queue.h>
+#include <sys/mutex.h>
+#include <sys/lock.h>
+#include <machine/pci_cfgreg.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
@@ -108,6 +112,11 @@
DEVMETHOD_END
};
+static MALLOC_DEFINE(M_AER, "AER", "Advanced Error Report");
+static STAILQ_HEAD(, AER_node) AER_root;
+static int AER_counter = 0;
+static struct mtx AER_lock;
+
static devclass_t pcib_devclass;
DEFINE_CLASS_0(pcib, pcib_driver, pcib_methods, sizeof(struct pcib_softc));
@@ -903,6 +912,333 @@
return(ENXIO);
}
+
+/*
+ * sysctl handler: reads error records.
+ */
+static int
+sysctl_pcib_error_records(SYSCTL_HANDLER_ARGS)
+{
+ int *name;
+ int report_idx;
+ u_int namelen;
+ struct AER_error_data *data;
+ struct AER_node *node;
+
+ name = (int*)arg1;
+ report_idx = name[0];
+ namelen = arg2;
+ data = NULL;
+
+ /* Input index cannot exceed #error actually recv */
+ if (namelen != 1)
+ return (EINVAL);
+
+ mtx_lock_spin(&AER_lock);
+ if (report_idx < 0 || report_idx >= AER_counter ||
+ report_idx < (AER_counter - AER_REC_MAXLEN)) {
+ mtx_unlock_spin(&AER_lock);
+ return (EINVAL);
+ }
+
+ /* Loop to index and sysctl */
+ STAILQ_FOREACH(node, &AER_root, next) {
+ if (node->index == report_idx) {
+ data = node->AER_error_data_container;
+ break;
+ }
+ }
+ mtx_unlock_spin(&AER_lock);
+
+ return (SYSCTL_OUT(req, data, sizeof(*data)));
+
+}
+
+
+/*
+ * sysctl handler: An optional feature to clear the error records.
+ * using sysctl call to hw.pci.pcib_clear_error_records
+ * with parameter name[]
+ *
+ */
+/*
+static int
+sysctl_pcib_clear_error_records(SYSCTL_HANDLER_ARGS)
+{
+ int *name;
+ int clear;
+ name = (int *)arg1;
+ clear = name[0];
+
+ if (clear == 1) {
+ mtx_lock_spin(&AER_lock);
+ STAILQ_INIT(&AER_root);
+ AER_counter = 0;
+ mtx_unlock_spin(&AER_lock);
+ }
+ else
+ return (EINVAL);
+
+ return 0;
+}
+
+SYSCTL_NODE(_hw_pci, OID_AUTO, pcib_clear_error_records, CTLFLAG_RD, sysctl_pcib_clear_error_records,
+ "PCI Bridge Clear Error records");
+*/
+
+
+/*
+ * sysctl handler: read pci config registers For error injection purpose.
+ */
+static int
+sysctl_pcib_probe(SYSCTL_HANDLER_ARGS)
+{
+ int *name, bus, slot, func, reg, bytes, result;
+ u_int namelen;
+
+ namelen = arg2;
+ if (namelen != 5)
+ return (EINVAL);
+
+ name = (int *)arg1;
+ bus = name[0];
+ slot = name[1];
+ func = name[2];
+ reg = name[3];
+ bytes = name[4];
+ result = pci_cfgregread(bus, slot, func, reg, bytes);
+
+ return (SYSCTL_OUT(req, &result, sizeof(int)));
+}
+
+
+/*
+ * Setup sysctl on hw.pci
+ */
+SYSCTL_INT(_hw_pci, OID_AUTO, pcib_error_count, CTLFLAG_RD, &AER_counter, 0,
+ "PCI Bridge Intr count");
+SYSCTL_NODE(_hw_pci, OID_AUTO, pcib_error_records, CTLFLAG_RD, sysctl_pcib_error_records,
+ "PCI Bridge Error records");
+SYSCTL_NODE(_hw_pci, OID_AUTO, pcib_probe, CTLFLAG_RD, sysctl_pcib_probe,
+ "PCI Bridge Probing for Error Injection");
+
+/*
+ * Interrupt handler
+ */
+static void
+pcib_intr_handler(void *arg)
+{
+ struct pcib_softc *sc;
+ struct AER_node *current;
+ struct AER_error_data *error_data;
+ device_t dev;
+ int offsAER;
+ int offsPCIE;
+ int temp;
+ int error;
+ int current_AER_counter;
+
+ sc = arg;
+ dev = sc->dev;
+ device_printf(dev, ": New Interrupt Received\n");
+
+ mtx_lock_spin(&AER_lock);
+ AER_counter += 1;
+ current_AER_counter = AER_counter;
+ mtx_unlock_spin(&AER_lock);
+
+ if ((current_AER_counter > AER_REC_MAXLEN) && !STAILQ_EMPTY(&AER_root)) {
+ STAILQ_REMOVE_HEAD(&AER_root, next);
+ printf("PCI-E Bridge AER Record list full, remove oldest record.\n");
+ }
+
+ /* Record Data */
+ error_data = malloc(sizeof(*error_data), M_AER, M_WAITOK | M_ZERO);
+
+ pci_find_cap(dev, PCIY_EXPRESS, &offsPCIE);
+ error = pci_find_extcap(dev, PCIZ_AER, &offsAER);
+
+ /* PCI-E error status */
+ temp = pci_read_config(dev, offsPCIE + PCIER_DEVICE_STA, 2);
+ error_data->pcie_cap_status = temp;
+ if (temp >= 0)
+ pci_write_config(dev, offsPCIE + PCIER_DEVICE_STA, BCLR_PCIE_STATUS, 1);
+
+ if (error) {
+ device_printf(dev, ": AER not supported\n");
+ } else {
+ /* Record source tag */
+ error_data->error_number = current_AER_counter;
+ error_data->dev_name = device_get_name(dev);
+ error_data->unit = device_get_unit(dev);
+
+ /* Uncorrectable error reg */
+ temp = pci_read_config(dev, offsAER+PCIR_AER_UC_STATUS, 4);
+ error_data->unc_err_status = temp;
+ if (temp >= 0)
+ pci_write_config(dev, offsAER + PCIR_AER_UC_STATUS, BCLR_AER_UNC_STATUS, 4);
+
+ /* Uncorrectable error severity */
+ error_data->unc_err_seve = pci_read_config(dev, offsAER + PCIR_AER_UC_SEVERITY, 4);
+
+ /* Correctable error reg */
+ temp = pci_read_config(dev, offsAER+PCIR_AER_COR_STATUS, 4);
+ error_data->c_err_status = temp;
+ if (temp >= 0)
+ pci_write_config(dev, offsAER + PCIR_AER_COR_STATUS, BCLR_AER_C_STATUS, 4);
+
+ /* ECRC */
+ error_data->AER_ECRC = pci_read_config(dev, offsAER + PCIR_AER_CAP_CONTROL, 4);
+
+ /* Root Port error status */
+ temp = pci_read_config(dev, offsAER + PCIR_AER_ROOTERR_STATUS, 4);
+ error_data->root_err_status = temp;
+ if (temp >= 0)
+ pci_write_config(dev, offsAER + PCIR_AER_ROOTERR_STATUS, BCLR_AER_RTSTATUS, 4);
+
+ /* ERROR Source */
+ error_data->err_src_ID = pci_read_config(dev, offsAER+PCIR_AER_ERR_SOURCE_ID, 2);
+ error_data->c_err_src_ID = pci_read_config(dev, offsAER+PCIR_AER_COR_SOURCE_ID, 2);
+ }
+
+ /* Save Node */
+ current = malloc(sizeof(*current), M_AER, M_WAITOK | M_ZERO);
+ current->index = current_AER_counter-1;
+ current->AER_error_data_container = error_data;
+
+ mtx_lock_spin(&AER_lock);
+ STAILQ_INSERT_TAIL(&AER_root, current, next);
+ mtx_unlock_spin(&AER_lock);
+
+}
+
+
+/*
+ * Setup Interrupt & Reg interrupt handler
+ */
+static void
+pcib_setup_intr_common(struct pcib_softc *sc)
+{
+ int error;
+ int offsAER;
+ int offsPCIE;
+ int dev_ctl;
+ int intr_count;
+ device_t dev = sc->dev;
+
+ pci_find_cap(dev, PCIY_EXPRESS, &offsPCIE);
+ error = pci_find_extcap(dev, PCIZ_AER, &offsAER);
+
+ /* Enable device error report */
+ if (error != 0) {
+ device_printf(dev, "Interrupt not registered: AER not supported\n");
+ return;
+ }
+ pci_write_config(dev, offsAER + PCIR_AER_ROOTERR_CMD, 7, 4);
+ dev_ctl = pci_read_config(dev, offsPCIE + PCIER_DEVICE_CTL, 1);
+ pci_write_config(dev, offsPCIE + PCIER_DEVICE_CTL, (dev_ctl | PCIEM_CTL_COR_ENABLE |
+ PCIEM_CTL_NFER_ENABLE | PCIEM_CTL_FER_ENABLE | PCIEM_CTL_URR_ENABLE), 1);
+ device_printf(dev,"Error Report Enabled\n");
+
+
+ /* Setup Interrupt & resource allocation */
+ sc->sc_irq_rid = -1;
+
+ intr_count = 1;
+ if (pci_msix_count(dev) == 1) {
+ error = pci_alloc_msix(dev, &intr_count);
+ if (!error)
+ sc->sc_irq_rid = 1;
+ } else if (pci_msi_count(dev) > 0) {
+ error = pci_alloc_msi(dev, &intr_count);
+ if (!error)
+ sc->sc_irq_rid = 1;
+ }
+ if (sc->sc_irq_rid < 0)
+ sc->sc_irq_rid = 0;
+
+ sc-> sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
+ &sc->sc_irq_rid, RF_ACTIVE);
+
+ if (!sc->sc_irq_res) {
+ device_printf(dev, "Unable to allocate interrupt resource\n");
+ if (sc->sc_irq_rid == 1)
+ pci_release_msi(dev);
+ return;
+ }
+
+
+ /* Reg intr handler */
+ error = bus_setup_intr(dev, sc->sc_irq_res, INTR_TYPE_MISC,
+ NULL, pcib_intr_handler, sc, &sc->sc_irq_cookie);
+ if (error) {
+ device_printf(dev, "Unable to register interrupt handler\n");
+ bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irq_rid, sc-> sc_irq_res);
+ if (sc->sc_irq_rid == 1)
+ pci_release_msi(dev);
+ return;
+ }
+
+ device_printf(dev, "Interrupt registered\n");
+
+ STAILQ_INIT(&AER_root);
+ mtx_init(&AER_lock, "AER Record", NULL, MTX_SPIN);
+
+}
+
+
+/*
+ * Remove Interrupt & release interrupt handler &
+ * Add interrupt release to bus_generic_detach
+ *
+ * Leave it commented since have not find a way to test it.
+ */
+/*
+static int
+pcib_release_intr_common(struct pcib_softc *sc)
+{
+ device_t dev;
+ int error;
+
+ dev = sc->dev;
+ error = bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_irq_cookie);
+ if (error) {
+ device_printf(dev, "Unable to teardown interrupt\n");
+ return error;
+ }
+
+ error = bus_free_resource(dev, SYS_RES_IRQ, sc->sc_irq_res);
+ if (error) {
+ device_printf(dev, "Unable to free interrupt resouce\n");
+ return error;
+ }
+
+ error = pci_release_msi(dev);
+ if (error)
+ device_printf(dev, "Unable to release interrupt\n");
+
+ return error;
+}
+
+int
+pcib_detach(device_t dev)
+{
+ int error;
+ struct pcib_softc *sc;
+
+ sc = device_get_softc(dev);
+ error = bus_generic_detach(dev);
+ if (error)
+ return (error);
+
+ if (sc->sc_irq_res)
+ error = pcib_release_intr_common(sc);
+
+ return error;
+}
+*/
+
+
void
pcib_attach_common(device_t dev)
{
@@ -914,6 +1250,8 @@
sc = device_get_softc(dev);
sc->dev = dev;
+ pcib_setup_intr_common(sc);
+
/*
* Get current bridge configuration.
*/
@@ -942,6 +1280,7 @@
SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "subbus",
CTLFLAG_RD, &sc->bus.sub, 0, "Subordinate bus number");
+
/*
* Quirk handling.
*/
Index: sys/dev/pci/pcib_private.h
===================================================================
--- sys/dev/pci/pcib_private.h
+++ sys/dev/pci/pcib_private.h
@@ -69,6 +69,36 @@
#define WIN_MEM 0x2
#define WIN_PMEM 0x4
+/*
+ * PCI-Bridge AER support
+ */
+/* AER Bits clear settings based on PCIE base spec. 1.1 */
+#define BCLR_PCIE_STATUS 0xf
+#define BCLR_AER_UNC_STATUS 0x1ff030
+#define BCLR_AER_C_STATUS 0x31c1
+#define BCLR_AER_RTSTATUS 127
+#define AER_REC_MAXLEN 255
+
+struct AER_error_data {
+ int error_number; /* Nth of error starts from 1 */
+ const char *dev_name;
+ int unit; /* device pcib #unit number of source */
+ int pcie_cap_status; /* Status register from PCI-E extended cap. */
+ int unc_err_status; /* uncorrectable error status */
+ int unc_err_seve; /* uncorrectable error severity */
+ int c_err_status; /* correctable error status */
+ int AER_ECRC; /* ECRC check status */
+ int root_err_status; /* Root complex error status */
+ int err_src_ID; /* uncorrectable error source ID */
+ int c_err_src_ID; /* correctable error source ID */
+};
+
+struct AER_node {
+ int index;
+ struct AER_error_data *AER_error_data_container;
+ STAILQ_ENTRY(AER_node) next;
+};
+
struct pcib_window {
pci_addr_t base; /* base address */
pci_addr_t limit; /* topmost address */
@@ -125,6 +155,10 @@
uint16_t secstat; /* secondary bus status register */
uint16_t bridgectl; /* bridge control register */
uint8_t seclat; /* secondary bus latency timer */
+
+ int sc_irq_rid; /* Interrupt handler*/
+ struct resource *sc_irq_res;
+ void *sc_irq_cookie;
};
#define PCIB_SUPPORTED_ARI_VER 1
Index: tools/tools/pci/InjectAER.c
===================================================================
--- /dev/null
+++ tools/tools/pci/InjectAER.c
@@ -0,0 +1,898 @@
+/*-
+ * Copyright (c) 2016 Isilon LLC, EMC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#include <stdio.h>
+#include <dev/pci/pcireg.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/pciio.h>
+#include <sys/ioctl.h>
+#include <sys/fcntl.h> /* O_RDWR */
+#include <sys/queue.h> /* STAILQ */
+#include <stdlib.h> /* malloc */
+#include <errno.h> /* strerror */
+#include <string.h> /* strcpy */
+#include <net/if.h> /* ifreq */
+#include <unistd.h> /* close */
+
+#define DEVICE_LIST_LENGTH 255
+#define INJ_SYSCTL_NAMELEN 8
+#define INJ_SYSCTL_PATH "hw.pci.pcib_probe"
+#define PATH_DEVPCI "/dev/pci"
+#define PCIEM_CAPID 0xFF
+#define PCIR_EXTCAP_MAX 0xFFF
+
+#define METHOD_RBER_AER 1
+#define METHOD_AER 2
+#define METHOD_NO_RBER 3
+#define METHOD_NICFLAG 4
+
+#define WIDTH_32B 4
+#define WIDTH_16B 2
+#define WIDTH_8B 1
+
+void usage();
+void inject_dev(int fatal);
+void inject_if();
+void scan();
+void error_handler(int stage);
+void record_initial();
+void restore_initial();
+int findPCIEOffs(struct pcisel *pi_sel);
+int findAEROffs(struct pcisel *pi_sel);
+
+
+struct config {
+ int bridgeControl;
+ int deviceControl;
+ int deviceUncErrMask;
+ int deviceUncErrSeve;
+ int deviceCErrMask;
+ int previousUP;
+ int previousBUSMAS;
+};
+
+struct dev_loca_entry {
+ struct pcisel bridge_pi_sel;
+ char bridge_name[PCI_MAXNAMELEN + 1];
+ int bridge_unit;
+ struct pcisel device_pi_sel;
+ char device_name[PCI_MAXNAMELEN + 1];
+ int device_unit;
+ int method;
+ STAILQ_ENTRY(dev_loca_entry) next;
+};
+
+static STAILQ_HEAD(,dev_loca_entry) dev_loca_head;
+int fd;
+
+/* Global VARs will be defined after selecting device */
+struct pcisel bridge_sel;
+struct pcisel device_sel;
+struct config initial_config;
+char device_name[IFNAMSIZ];
+int PCIE_offs;
+int AER_offs;
+
+
+
+/*
+ * Read user input, process automatic error-injection,
+ * or guide user thorugh running error-injection.
+ */
+int main(int argc, char **argv)
+{
+ char parm;
+ int automode;
+ int count, select_index, select_fatal, method;
+ struct dev_loca_entry *entry;
+
+ if (argc != 2) {
+ usage(0);
+ return 0;
+ }
+ while ((parm = getopt(argc, argv, "alh")) != -1) {
+ switch(parm) {
+ case 'a':
+ automode = 1;
+ break;
+
+ case 'l':
+ automode = 0;
+ break;
+
+ case 'h':
+ usage();
+ return 0;
+
+ default:
+ usage();
+ return 0;
+ }
+ }
+
+ bzero(&bridge_sel, sizeof(bridge_sel));
+ bzero(&device_sel, sizeof(device_sel));
+ fd = open(PATH_DEVPCI, O_RDWR);
+ if (fd < 0) {
+ printf("Unable to open %s.\n", PATH_DEVPCI);
+ error_handler (1);
+ }
+
+ STAILQ_INIT(&dev_loca_head);
+
+ scan();
+
+ printf("Scan result:\n");
+ if (!STAILQ_FIRST(&dev_loca_head)) {
+ printf("No potentially error-inject-avilabile device found\n");
+ error_handler(0);
+ }
+
+ if (automode) {
+ entry = STAILQ_FIRST(&dev_loca_head);
+ printf("Performing auto error-injetion on: \n");
+ printf("Bridge=%s%d@%d:%d:%d:%d ",entry->bridge_name, entry->bridge_unit,
+ entry->bridge_pi_sel.pc_domain,entry->bridge_pi_sel.pc_bus,
+ entry->bridge_pi_sel.pc_dev,entry->bridge_pi_sel.pc_func);
+ printf("Device=%s%d@%d:%d:%d:%d ",entry->device_name, entry->device_unit,
+ entry->device_pi_sel.pc_domain,entry->device_pi_sel.pc_bus,
+ entry->device_pi_sel.pc_dev,entry->device_pi_sel.pc_func);
+ printf("Method=%d\n", entry->method);
+ bridge_sel = entry->bridge_pi_sel;
+ device_sel = entry->device_pi_sel;
+ sprintf(device_name, "%s%d", entry->device_name, entry->device_unit);
+ select_fatal = 0;
+ PCIE_offs = findPCIEOffs(&device_sel);
+ AER_offs = findAEROffs(&device_sel);
+ if (entry->method == METHOD_NICFLAG)
+ inject_if();
+ else
+ inject_dev(select_fatal);
+ close(fd);
+ printf("Error injection complete.\n");
+ return 0;
+ }
+
+ count = 0;
+ STAILQ_FOREACH(entry, &dev_loca_head, next) {
+ printf("Index: %d ", count);
+ printf("Bridge: %s%d@%d:%d:%d:%d ",entry->bridge_name, entry->bridge_unit,
+ entry->bridge_pi_sel.pc_domain,entry->bridge_pi_sel.pc_bus,
+ entry->bridge_pi_sel.pc_dev,entry->bridge_pi_sel.pc_func);
+ printf("Device: %s%d@%d:%d:%d:%d ",entry->device_name, entry->device_unit,
+ entry->device_pi_sel.pc_domain,entry->device_pi_sel.pc_bus,
+ entry->device_pi_sel.pc_dev,entry->device_pi_sel.pc_func);
+ printf("Method=");
+ switch (entry->method) {
+ case (METHOD_RBER_AER):
+ printf ("Probing: COR/Fatal");
+ break;
+ case (METHOD_AER):
+ printf ("Probing: non-Fatal/Fatal");
+ break;
+ case (METHOD_NO_RBER):
+ printf ("Probing: non-fatal");
+ break;
+ case (METHOD_NICFLAG):
+ printf ("NIC flag: non-fatal");
+ break;
+ default:
+ error_handler(2);
+ }
+ printf("\n");
+ count ++;
+ }
+
+ printf("Select a combination to try error injection.\n");
+ select_index = 0;
+ while(1) {
+ printf("Input index (0~%d): ", count - 1);
+ scanf("%d", &select_index);
+ if (select_index < count && select_index >= 0)
+ break;
+ }
+
+ printf("Get: %d\n", select_index);
+
+ count = 0;
+ STAILQ_FOREACH(entry, &dev_loca_head, next) {
+ if (count == select_index) {
+ bridge_sel = entry->bridge_pi_sel;
+ device_sel = entry->device_pi_sel;
+ sprintf(device_name, "%s%d", entry->device_name, entry->device_unit);
+ method = entry->method;
+ break;
+ }
+ count ++;
+ }
+
+ printf("Selected: ");
+ printf("Index=%d ", count);
+ printf("DeviceName=%s ", device_name);
+ printf("Method=%d\n", method);
+
+ select_fatal = 0;
+ printf ("\n");
+ printf ("Expected: ");
+ switch (method) {
+ case METHOD_RBER_AER:
+ while (1) {
+ printf ("Select error type ");
+ printf ("(0=correctable, 1=fatal): ");
+ scanf("%d", &select_fatal);
+ if (select_fatal == 0 | select_fatal == 1)
+ break;
+ }
+ if (select_fatal)
+ printf ("Uncorrectable fatal error ");
+ else
+ printf ("Correctable error ");
+ printf ("record on root port status ");
+ break;
+
+ case METHOD_AER:
+ while (1) {
+ printf ("Select error type ");
+ printf ("(0=non-fatal, 1=fatal): ");
+ scanf("%d", &select_fatal);
+ if (select_fatal == 0 | select_fatal == 1)
+ break;
+ }
+ if (select_fatal)
+ printf ("Uncorrectable fatal error ");
+ else
+ printf ("Uncorrectable non-fatal error ");
+ printf ("record on root port status ");
+ break;
+
+ case METHOD_NO_RBER:
+ printf ("Uncorrectable non-fatal error ");
+ printf ("record on root port status ");
+ break;
+
+ case METHOD_NICFLAG:
+ printf ("Uncorrectable non-fatal error ");
+ printf ("record on root port status, and\n");
+ printf ("Error 'Unsupported Request' record on ");
+ printf ("bridge AER Uncorrectable error status\n");
+ break;
+
+ default:
+ error_handler (2);
+ break;
+ }
+ printf ("after injection.\n");
+ printf ("\n");
+
+
+ PCIE_offs = findPCIEOffs(&device_sel);
+ AER_offs = findAEROffs(&device_sel);
+ printf("Device: PCI-E Extd Config Space @ %X \n", PCIE_offs);
+ if (AER_offs)
+ printf("Device: AER Cap Config Space @ %X \n", AER_offs);
+ else
+ printf("Device: AER Not supported\n");
+
+ if (method == METHOD_NICFLAG)
+ inject_if();
+ else
+ inject_dev(select_fatal);
+
+ close(fd);
+
+ printf("Error injection complete.\n");
+ printf("\n");
+
+ return 0;
+}
+
+/*
+ * Attempt to probe non-existent function to trigger UR error.
+ * error message is generated by device and forwarded by bridge.
+ */
+void inject_dev(int fatal)
+{
+ int name[INJ_SYSCTL_NAMELEN];
+ int buffer;
+ size_t len, size;
+ struct pci_io bridge_io;
+ struct pci_io device_io;
+ bzero(&bridge_io, sizeof(struct pci_io));
+ bzero(&device_io, sizeof(struct pci_io));
+ bridge_io.pi_sel = bridge_sel;
+ device_io.pi_sel = device_sel;
+
+ /* Record initial value */
+ record_initial();
+
+ /* Clear MasterAbort & Error Received */
+ bridge_io.pi_width = WIDTH_16B;
+ bridge_io.pi_reg = PCIR_SECSTAT_1;
+ bridge_io.pi_data = PCIM_STATUS_RMABORT | PCIM_STATUS_SERR;
+ if (ioctl(fd, PCIOCWRITE, &bridge_io))
+ error_handler(31);
+
+ /* Enable error forwarding */
+ bridge_io.pi_width = WIDTH_8B;
+ bridge_io.pi_reg = PCIR_BRIDGECTL_1;
+ bridge_io.pi_data = initial_config.bridgeControl | PCIB_BCR_SERR_ENABLE;
+ if (ioctl(fd, PCIOCWRITE, &bridge_io))
+ error_handler(31);
+
+ /* Enable error report */
+ device_io.pi_width = WIDTH_8B;
+ device_io.pi_reg = PCIE_offs+PCIER_DEVICE_CTL;
+ device_io.pi_data = initial_config.deviceControl |
+ PCIEM_CTL_URR_ENABLE |
+ PCIEM_CTL_FER_ENABLE |
+ PCIEM_CTL_NFER_ENABLE |
+ PCIEM_CTL_COR_ENABLE;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(32);
+
+ /* AER registers */
+ if (AER_offs) {
+ device_io.pi_width = WIDTH_32B;
+
+ /* Clear UR status */
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_STATUS;
+ device_io.pi_data = PCIM_AER_UC_UNSUPPORTED_REQUEST;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(33);
+
+ /* Clear AdvsErr status */
+ device_io.pi_reg = AER_offs + PCIR_AER_COR_STATUS;
+ device_io.pi_data = PCIM_AER_COR_ADVISORY_NF_ERROR;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(33);
+
+ /* Clear Uncorrectable error mask for UR */
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_MASK;
+ device_io.pi_data = (initial_config.deviceCErrMask &
+ (~PCIM_AER_UC_UNSUPPORTED_REQUEST));
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(33);
+
+ /* Clear Correctable error mask for advs. unc. error */
+ device_io.pi_reg = AER_offs + PCIR_AER_COR_MASK;
+ device_io.pi_data = (initial_config.deviceCErrMask &
+ (~PCIM_AER_COR_ADVISORY_NF_ERROR));
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(33);
+
+ /* Fatal or Non-fatal error */
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_SEVERITY;
+ if (fatal)
+ device_io.pi_data = (initial_config.deviceUncErrSeve |
+ PCIM_AER_UC_UNSUPPORTED_REQUEST);
+ else
+ device_io.pi_data = (initial_config.deviceUncErrSeve &
+ (~PCIM_AER_UC_UNSUPPORTED_REQUEST));
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(33);
+ printf("Uncorrectable Error Severity set to: %X\n", device_io.pi_data);
+ }
+
+
+ /* Probe non-existent function */
+ len = INJ_SYSCTL_NAMELEN;
+ sysctlnametomib(INJ_SYSCTL_PATH, name, &len);
+ name[3] = device_io.pi_sel.pc_bus;
+ name[4] = device_io.pi_sel.pc_dev;
+ name[5] = PCI_FUNCMAX;
+ name[6] = 0;
+ name[7] = WIDTH_32B;
+
+ size = sizeof(int);
+ if (sysctl(name, INJ_SYSCTL_NAMELEN, &buffer, &size, NULL, 0))
+ error_handler(34);
+
+ /* Verify MasterAbort & Error Received */
+ bridge_io.pi_width = WIDTH_16B;
+ bridge_io.pi_reg = PCIR_SECSTAT_1;
+ if (ioctl(fd, PCIOCREAD, &bridge_io))
+ error_handler(35);
+ printf("Bridge sec. status after injection: %X\n", bridge_io.pi_data);
+
+ if (AER_offs) {
+ device_io.pi_width = WIDTH_32B;
+
+ /* Clear UR status */
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_STATUS;
+ device_io.pi_data = PCIM_AER_UC_UNSUPPORTED_REQUEST;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(35);
+
+ /* Clear AdvsErr status */
+ device_io.pi_reg = AER_offs + PCIR_AER_COR_STATUS;
+ device_io.pi_data = PCIM_AER_COR_ADVISORY_NF_ERROR;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(35);
+ }
+
+ /* Clear MasterAbort & Error Received */
+ bridge_io.pi_data = PCIM_STATUS_RMABORT | PCIM_STATUS_SERR;
+ if (ioctl(fd, PCIOCWRITE, &bridge_io))
+ error_handler(35);
+
+ /* Restore */
+ restore_initial();
+}
+
+
+/*
+ * Disabling busmaster and set NIC flag to trigger UR errors.
+ * Error is generated by bridge.
+ */
+void inject_if()
+{
+ int sockfd;
+ char input;
+ struct pci_io bridge_io;
+ struct ifreq ifr;
+
+ sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sockfd < 0)
+ error_handler(51);
+
+ bzero(&initial_config, sizeof(initial_config));
+ bzero(&bridge_io, sizeof(bridge_io));
+ bridge_io.pi_sel = bridge_sel;
+ bzero(&ifr, sizeof(ifr));
+ strcpy(ifr.ifr_name, device_name);
+ if (ioctl(sockfd, SIOCGIFFLAGS, &ifr))
+ error_handler (52);
+ printf("Device %s current lower-flags: %X\n", device_name, ifr.ifr_flags);
+ if ((ifr.ifr_flags & IFF_UP) == IFF_UP) {
+ initial_config.previousUP = 1;
+ printf("This device is currently in up state, ");
+ printf("continue may bring link down.\n");
+ while (1) {
+ printf("Continue (y/n) ? ");
+ input = getchar();
+ if (input == 'y')
+ break;
+ else if (input == 'n')
+ error_handler (0);
+ }
+ }
+
+ /* Disable NIC first */
+ ifr.ifr_flags = ifr.ifr_flags & (~IFF_UP);
+ if (ioctl(sockfd, SIOCSIFFLAGS, &ifr))
+ error_handler (53);
+
+ /* Disable Bridge's BusMaster */
+ bridge_io.pi_width = WIDTH_8B;
+ bridge_io.pi_reg = PCIR_COMMAND;
+ if (ioctl(fd, PCIOCREAD, &bridge_io))
+ error_handler(53);
+ initial_config.previousBUSMAS = bridge_io.pi_data;
+ if (!(initial_config.previousBUSMAS & PCIM_CMD_BUSMASTEREN)) {
+ printf ("BusMaster is previously disabled:");
+ printf ("Unexpected settings. Quit\n");
+ error_handler(0);
+ }
+ bridge_io.pi_data = bridge_io.pi_data & (~PCIM_CMD_BUSMASTEREN);
+ if (ioctl(fd, PCIOCWRITE, &bridge_io))
+ error_handler(53);
+
+ /* Toggle NIC flag */
+ ifr.ifr_flags = ifr.ifr_flags | IFF_UP;
+ if (ioctl(sockfd, SIOCSIFFLAGS, &ifr))
+ error_handler (52);
+ ifr.ifr_flags = ifr.ifr_flags & (~IFF_UP);
+ if (ioctl(sockfd, SIOCSIFFLAGS, &ifr))
+ error_handler (52);
+
+ /* Bring back BusMaster */
+ bridge_io.pi_data = initial_config.previousBUSMAS;
+ if (ioctl(fd, PCIOCWRITE, &bridge_io))
+ error_handler(53);
+
+ /* Restore NIC flag */
+ if (initial_config.previousUP) {
+ ifr.ifr_flags = ifr.ifr_flags | IFF_UP;
+ if (ioctl(sockfd, SIOCSIFFLAGS, &ifr))
+ error_handler (52);
+ }
+}
+
+/*
+ * Record settings before performing any modification.
+ */
+void record_initial()
+{
+ struct pci_io bridge_io;
+ struct pci_io device_io;
+ bzero(&bridge_io, sizeof(bridge_io));
+ bzero(&device_io, sizeof(device_io));
+ bzero(&initial_config, sizeof(initial_config));
+ bridge_io.pi_sel = bridge_sel;
+ device_io.pi_sel = device_sel;
+
+ bridge_io.pi_width = WIDTH_8B;
+ bridge_io.pi_reg = PCIR_BRIDGECTL_2;
+ if (ioctl(fd, PCIOCREAD, &bridge_io))
+ error_handler(21);
+ initial_config.bridgeControl = bridge_io.pi_data;
+ bridge_io.pi_width = WIDTH_16B;
+ if (ioctl(fd, PCIOCREAD, &bridge_io))
+ error_handler(21);
+ printf("Initial bridge control: %X\n",bridge_io.pi_data);
+
+ device_io.pi_width = WIDTH_8B;
+ device_io.pi_reg = PCIE_offs + PCIER_DEVICE_CTL;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(22);
+ initial_config.deviceControl = device_io.pi_data;
+ device_io.pi_width = WIDTH_16B;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(22);
+ printf("Initial device control: %X\n",device_io.pi_data);
+
+ bridge_io.pi_width = WIDTH_16B;
+ bridge_io.pi_reg = PCIR_SECSTAT_1;
+ if (ioctl(fd, PCIOCREAD, &bridge_io))
+ error_handler(21);
+ printf("SecondaryStatus before inject: %X\n", bridge_io.pi_data);
+
+ if (AER_offs) {
+ device_io.pi_width = WIDTH_32B;
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_STATUS;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(23);
+ printf("AER Uncorrectable error status before inject: %X\n",
+ device_io.pi_data);
+
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_MASK;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(23);
+ initial_config.deviceUncErrMask = device_io.pi_data;
+
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_SEVERITY;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(23);
+ initial_config.deviceUncErrSeve = device_io.pi_data;
+
+ device_io.pi_reg = AER_offs + PCIR_AER_COR_STATUS;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(23);
+ printf("AER Correctable error status before inject: %X\n",
+ device_io.pi_data);
+
+ device_io.pi_reg = AER_offs + PCIR_AER_COR_MASK;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(23);
+ initial_config.deviceCErrMask = device_io.pi_data;
+ }
+}
+
+/*
+ * Restore settings after error-injection, or run-time error.
+ */
+void restore_initial()
+{
+ struct pci_io bridge_io;
+ struct pci_io device_io;
+ bzero(&bridge_io, sizeof(bridge_io));
+ bzero(&device_io, sizeof(device_io));
+ bridge_io.pi_sel = bridge_sel;
+ device_io.pi_sel = device_sel;
+
+ bridge_io.pi_width = WIDTH_8B;
+ bridge_io.pi_reg = PCIR_BRIDGECTL_2;
+ bridge_io.pi_data = initial_config.bridgeControl;
+ if (ioctl(fd, PCIOCWRITE, &bridge_io))
+ error_handler(41);
+ bridge_io.pi_width = WIDTH_16B;
+ if (ioctl(fd, PCIOCREAD, &bridge_io))
+ error_handler(41);
+ printf("Restored bridge control: %X\n",bridge_io.pi_data);
+
+ device_io.pi_width = WIDTH_8B;
+ device_io.pi_reg = PCIE_offs + PCIER_DEVICE_CTL;
+ device_io.pi_data = initial_config.deviceControl;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(42);
+ device_io.pi_width = WIDTH_16B;
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(42);
+ printf("Restored device control: %X\n",device_io.pi_data);
+
+ if (AER_offs) {
+ device_io.pi_width = WIDTH_32B;
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_SEVERITY;
+ device_io.pi_data = initial_config.deviceUncErrSeve;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(43);
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(43);
+ printf("Restored Uncorrectable Error Severity: %X\n",
+ device_io.pi_data);
+
+ device_io.pi_reg = AER_offs + PCIR_AER_UC_MASK;
+ device_io.pi_data = initial_config.deviceUncErrMask;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(43);
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(43);
+ printf("Restored Uncorrectable Error Mask: %X\n",
+ device_io.pi_data);
+
+ device_io.pi_reg = AER_offs + PCIR_AER_COR_MASK;
+ device_io.pi_data = initial_config.deviceCErrMask;
+ if (ioctl(fd, PCIOCWRITE, &device_io))
+ error_handler(43);
+ if (ioctl(fd, PCIOCREAD, &device_io))
+ error_handler(43);
+ printf("Restored Correctable Error Mask: %X\n",
+ device_io.pi_data);
+ }
+}
+
+
+/* General helpers */
+
+/*
+ * Handles runtime errors.
+ * try restoring configuration if any modification is made,
+ * or print configuration if cannot restore.
+ */
+void error_handler(int stage)
+{
+ if (stage) {
+ printf("\n");
+ printf("ERROR! Error stage NO.: %d\n",stage);
+ printf("Translation (errno): %s\n",strerror(errno));
+ }
+
+ printf("\n");
+ switch (stage / 10) {
+ case 3:
+ printf("Try restoring initial configration\n");
+ restore_initial();
+ printf("Restore success\n");
+ break;
+
+ case 4:
+ printf("Restore configration failed.\n");
+ printf("Print stored initial configration:\n");
+ printf("* bridge control [8 bits]: %X\n", initial_config.bridgeControl);
+ printf("* device control [8 bits]: %X\n", initial_config.deviceControl);
+ printf("* device UNC Mask [AER][32 bits]: %X\n", initial_config.deviceUncErrMask);
+ printf("* device UNC Severity [AER][32 bits]: %X\n", initial_config.deviceUncErrSeve);
+ printf("* device COR Mask [AER][32 bits]: %X\n", initial_config.deviceCErrMask);
+ break;
+
+ case 5:
+ printf("Status before error-injection:\n");
+ printf("* NIC device flag UP: %d\n", initial_config.previousUP);
+ printf("* Bridge busMaster enable: %d\n", initial_config.previousBUSMAS);
+ break;
+
+ default:
+ printf("No configuration changed\n");
+ break;
+ }
+ abort ();
+}
+
+/*
+ * Return the offset of PCI-Express capbility config space.
+ * Return 0 if not found.
+ */
+int findPCIEOffs(struct pcisel *pi_sel)
+{
+ struct pci_io io;
+ bzero(&io, sizeof(io));
+ int pointer;
+ int offset = 0;
+ io.pi_width = WIDTH_8B;
+ io.pi_reg = PCIR_CAP_PTR;
+ io.pi_sel = *pi_sel;
+ if (ioctl(fd, PCIOCREAD, &io))
+ error_handler(101);
+ pointer = io.pi_data;
+ io.pi_width = WIDTH_16B;
+ while (pointer < PCIR_EXTCAP && pointer > 0) {
+ io.pi_reg = pointer;
+ if (ioctl(fd, PCIOCREAD, &io))
+ error_handler(101);
+ if ((io.pi_data & PCIEM_CAPID) == PCIY_EXPRESS) {
+ offset = pointer;
+ break;
+ }
+ pointer = (io.pi_data) >> 8;
+ }
+ return pointer;
+}
+
+/*
+ * Return the offset of PCI-Express AER capbility configuration space.
+ * Return 0 if not found.
+ */
+int findAEROffs(struct pcisel *pi_sel)
+{
+ struct pci_io io;
+ bzero(&io, sizeof(io));
+ int pointer;
+ int offset = 0;
+ io.pi_width = WIDTH_32B;
+ io.pi_sel = *pi_sel;
+ pointer = PCIR_EXTCAP;
+ while (pointer < PCIR_EXTCAP_MAX &&
+ pointer >= PCIR_EXTCAP) {
+ io.pi_reg = pointer;
+ if (ioctl(fd, PCIOCREAD, &io))
+ error_handler(102);
+ if (PCI_EXTCAP_ID(io.pi_data) == PCIZ_AER) {
+ offset = pointer;
+ break;
+ }
+ pointer = PCI_EXTCAP_NEXTPTR(io.pi_data);
+ }
+ return offset;
+}
+
+
+/* Scan and scan helpers */
+
+/*
+ * Record any avaliable methods into the singly-linked tail queue.
+ */
+void recordAvbl(int bgidx, int devidx, struct pci_conf *buffer, int method)
+{
+ struct dev_loca_entry *entry;
+ entry = malloc(sizeof(*entry));
+ entry->bridge_pi_sel = buffer[bgidx].pc_sel;
+ strcpy(entry->bridge_name, buffer[bgidx].pd_name);
+ entry->bridge_unit = buffer[bgidx].pd_unit;
+ entry->device_pi_sel = buffer[devidx].pc_sel;
+ strcpy(entry->device_name, buffer[devidx].pd_name);
+ entry->device_unit = buffer[devidx].pd_unit;
+ entry->method = method;
+ if (method == METHOD_NICFLAG)
+ STAILQ_INSERT_TAIL(&dev_loca_head, entry, next);
+ else
+ STAILQ_INSERT_HEAD(&dev_loca_head, entry, next);
+}
+
+
+/*
+ * Check what kind of error-injection method can apply on certain bridge-device combination.
+ */
+void detectAvbl(int bgidx, int devidx, struct pci_conf *buffer, int pcie_offs)
+{
+ struct pci_io io;
+ int RBER_enable;
+
+ /* Check1: Network Interface */
+ if (buffer[devidx].pc_class == PCIC_NETWORK &&
+ buffer[devidx].pc_subclass == PCIS_NETWORK_ETHERNET)
+ recordAvbl(bgidx, devidx, buffer, METHOD_NICFLAG);
+
+ /* Check2: Role-Based Error Reporting */
+ RBER_enable = 0;
+ bzero(&io, sizeof(io));
+ io.pi_sel = buffer[devidx].pc_sel;
+ io.pi_width = WIDTH_16B;
+ io.pi_reg = PCIER_DEVICE_CAP + pcie_offs;
+ if (ioctl(fd, PCIOCREAD, &io))
+ error_handler(13);
+ RBER_enable = (io.pi_data & PCIEM_CAP_ROLE_ERR_RPT) >> 15;
+
+ /* If PCI_FUNCMAX is assigned, cannot take the device. */
+ io.pi_sel.pc_func = PCI_FUNCMAX;
+ if (!ioctl(fd, PCIOCREAD, &io))
+ return;
+ if (findAEROffs(&buffer[devidx].pc_sel)) {
+ if (RBER_enable)
+ recordAvbl(bgidx, devidx, buffer, METHOD_RBER_AER);
+ else
+ recordAvbl(bgidx, devidx, buffer, METHOD_AER);
+ } else if (!RBER_enable)
+ recordAvbl(bgidx, devidx, buffer, METHOD_NO_RBER);
+}
+
+/*
+ * Scan through the children of a bridge.
+ */
+void scanChild(int bgidx, struct pci_conf *buffer, int num_matches)
+{
+ int pcie_offs, bus, i;
+ struct pci_io scan_io;
+ bzero(&scan_io, sizeof(scan_io));
+ scan_io.pi_width = WIDTH_16B;
+ scan_io.pi_reg = PCIR_PRIBUS_1;
+ scan_io.pi_sel = buffer[bgidx].pc_sel;
+ if(ioctl(fd, PCIOCREAD, &scan_io))
+ error_handler(12);
+ bus = scan_io.pi_data >> 8;
+ for (i = 0; i < num_matches; i++) {
+ if (buffer[i].pc_sel.pc_bus != bus)
+ continue;
+ pcie_offs = findPCIEOffs(&buffer[i].pc_sel);
+ if (pcie_offs)
+ detectAvbl(bgidx, i, buffer, pcie_offs);
+ }
+}
+
+/*
+ * Using ioctl to get the device list from kernel.
+ * Only search for bridges first.
+ */
+void scan()
+{
+ int i;
+ struct pci_conf_io pciconfio;
+ struct pci_conf buffer[DEVICE_LIST_LENGTH];
+ bzero(&pciconfio, sizeof(pciconfio));
+ pciconfio.match_buf_len = sizeof(buffer);
+ pciconfio.matches = buffer;
+ printf("Perform device list scan..\n");
+ if (ioctl(fd, PCIOCGETCONF, &pciconfio))
+ error_handler(11);
+ switch (pciconfio.status) {
+ case PCI_GETCONF_LAST_DEVICE:
+ break;
+ case PCI_GETCONF_LIST_CHANGED:
+ printf("Device list changes, please retry\n");
+ error_handler(0);
+ case PCI_GETCONF_MORE_DEVS:
+ printf("Increase DEVICE_LIST_LENGTH and retry\n");
+ error_handler(0);
+ case PCI_GETCONF_ERROR:
+ error_handler(11);
+ break;
+ default:
+ error_handler(11);
+ }
+
+ for (i = 0; i < pciconfio.num_matches; i++) {
+ if (buffer[i].pc_sel.pc_bus != 0)
+ continue;
+ if (buffer[i].pc_class != PCIC_BRIDGE ||
+ buffer[i].pc_subclass != PCIS_BRIDGE_PCI)
+ continue;
+ if (!findPCIEOffs(&buffer[i].pc_sel))
+ continue;
+ if (!findAEROffs(&buffer[i].pc_sel))
+ continue;
+ scanChild(i, buffer, pciconfio.num_matches);
+ }
+}
+
+/* Usage and description */
+void usage()
+{
+ fprintf(stderr, "%s\n%s\n%s\n",
+ "usage: InjectAER -a: automatically try error-injection on a device.",
+ " InjectAER -l: list available devices and methods and guide selection.",
+ " InjectAER -h: Usage.");
+}
Index: tools/tools/pci/README
===================================================================
--- /dev/null
+++ tools/tools/pci/README
@@ -0,0 +1,177 @@
+
+ PCI-E root port bridge error injection tool
+ -------------------------------------------------
+
+* Usage:
+ compiles using C99. Running on root privilege.
+
+ InjectAER -a: automatically try error-injection on a device.",
+ InjectAER -l: wizard, list available devices and methods and guide selection.",
+ InjectAER -h: Usage."
+
+
+* Meaning of Methods:
+ Probing: Using configuration space request to probe a non-existent function.
+ NIC_flag: Set NIC flag when BusMaster is disabled.
+
+ Details see background info.
+
+
+* Types of error can possibly injected:
+ COR/: Correctable error/
+ non-Fatal/: non-fatal uncorrectable error/
+ Fatal. fatal uncorrectable error.
+
+ Details see background info.
+
+
+* Limitation:
+ This program relies on the AER driver support patch on dev/pci/pci_pci.c
+
+ Probing method:
+ I. A PCI-E root port bridge device (CLASS=0x060400) supports AER.
+ II. A PCI-E device on the secondary bus under the bridge.
+ III. The device “supports AER” or “has not implemented Role-Based Error Reporting”.
+ IV. The device has not implement last function (current standard is function 7).
+
+ * Since the bridge only forwards the error, the bridge’s AER status registers will not
+ record any detailed record. Instead, the root port status is expected to record
+ corresponding error.
+
+
+ NIC_flag method:
+ I. A PCI-E root port bridge device (CLASS=0x060400) supports AER.
+ II. A PCI-E Ethernet device on the secondary bus under the bridge. (Class=0x020000)
+ III. The Ethernet device should be unused (to prevent from connection lost).
+
+
+* If Runtime Error: Error Stage Description:
+ 0: normal abort
+ ----Main------
+ 1: open device character file
+ 2: Unexpected method selection
+ ----scan------
+ 11: Retrive device list
+ 12: Using ioctl access device config space
+ 13: Using ioctl access device PCIE extended config sapce
+ ----Inject_dev(Recording initial settings)------
+ 21: Using ioctl to access bridge config
+ 22: Using ioctl to access device config
+ 23: Using ioctl to access device AER cap
+ ----Inject_dev--------
+ 31: ioctl modify settings on bridge config
+ 32: ioctl modify settings on device config
+ 33: ioctl modify settings on device AER cap
+ 34: sysctl calling device probing
+ 35: After probing: ioctl clearing bits
+ ----Inject_dev(Restore initial settings)------
+ 41: Using ioctl to access bridge config
+ 42: Using ioctl to access device config
+ 43: Using ioctl to access device AER cap
+ ----Inject_if-----------
+ 51: Open socket
+ 52: Using ioctl to access device flag
+ 53: Using ioctl to access bridge config
+ ----General Helpers----
+ 101: ioctl searching for PCIE extended config space
+ 102: ioctl searching for AER cap config sapce
+
+
+* Background (This program)
+
+ The program can be run with different parameters that leads to automatic mode or
+ wizard mode. In automatic mode, the program will pick the first bridge-device
+ combination in the queue and performs a non-fatal error injection using the first
+ recorded method. In wizard mode, the program will let the user choose which
+ bridge-device combination to perform injection and which method to use. If the
+ device is capable to inject fatal error, the program will also prompt user to choose.
+
+ Before any attempt to make change on device, the program will first save current
+ configurations and print current error status. Configurations are saved in a global
+ structure “initial_config”. After completion of error-injection, the program will then
+ restore previous configurations and clear error status.
+
+ If an runtime error happens, the error handler will print stage number and errno translation.
+ If the error happens after some configuration modification to the device,
+ the error handler will try restore the configuration. If the restoration failed,
+ it will print out all the configuration saved previously.
+
+
+
+
+* Background (Probing)
+
+ Device probing is commonly used when the system initializes and tries to scan all the peripheral
+ devices attached to the system. It is normally done by sending configuration-space read
+ requests to the PCI bus, with specific device (slot) number and function number.
+
+ By default, a request will receive a “Master abort” completion status and return value of all ‘1’s
+ if the request reaches non-existent device or function. In addition to that, a PCI-E device will
+ generate “Unsupported Request” error message, but that will not be signaled to the root to
+ trigger an interrupt by most default settings.
+
+ The goal is try to clear the path for the “Unsupported Request” error message to be successfully
+ forwarded to the root. After making many tests on my hardware from Intel, I found the most
+ effective way to achieve the goal is causing “Unsupported Request” on an end-point devices,
+ and let the upstream PCI-E bridges to forward the error messages to root ports, which will then
+ generate the interrupt.
+
+ End-point devices varies their behaviors a lot for different hardware. Based on all the tests I
+ made on my available hardware, there are two important factors that affect the behavior of
+ a device when it receives an invalid configuration space access request: the implement of
+ Role-Based Error Reporting (RBER) and AER.
+
+ I. The device implements neither RBER nor AER.
+ In this case, the invalid configuration space request will be treated as “unsupported
+ request” non-fatal error. By enabling the “unsupported request” report and non-
+ fatal error report bits in device control section, the device will send a non-fatal
+ uncorrectable error message to upstream bridge, “ERR_NONFATAL”.
+
+ II. The device implements AER but not RBER.
+ This case is similar to the first case that the error message can be sent by enabling
+ error reporting bits. Better than the first case, I can change the severity of
+ “unsupported request” in the AER uncorrectable error severity section, and will be
+ able to send “ERR_NONFATAL” and “ERR_FATAL” message to the bridge.
+
+ III. The device implements both RBER and AER.
+ Starting from PCI-E specification ver. 1.1, PCI-E devices are required to implement
+ RBER. In this case, with RBER, the device will “be smart” and change the type of
+ error signaling based on the error detection agent. When the device receives an
+ invalid configuration space access request, the device with RBER will treat the
+ “unsupported request” non-fatal error as a masked “advisory non-fatal” correctable
+ error, in order to avoid disturbing the probing process. With AER support, I can clear
+ the mask for “advisory non-fatal” error on the AER correctable error mask section,
+ to let the device send an “ERR_COR” message. In addition to that, I can avoid the
+ participation of RBER by changing the severity of “unsupported request” error to
+ “FATAL”, and the device will send an “ERR_FATAL” message.
+
+ IV. The device implements RBER but not AER
+ Unfortunately, this is a dead end. Without AER, I cannot change the severity of
+ “unsupported request” to “fatal”, or let the device report correctable error. The
+ error signaling flow chart on the specific sheet (pp. 291 [1]) also implies the result.
+
+ * Some thoughts:
+ All cases in this method rely on a bridge device and a child device,
+ where the child sends error message, and the bridge forwards it. Technically we should be able
+ to apply the same trick to a bridge device, which could be better because we do not need to
+ deal with the children. However, I did tons on tests on my PCI-E root port bridges but none of
+ them shows any sign of generating an error to respond an invalid configuration space request.
+ According to the flowchart (pp. 291 [1]) there should be at least a correctable error recorded in
+ AER status, but my devices disagree with that.
+
+
+
+* Background (NIC_flag)
+
+ I can disable the “Bus Master Enable” bits on the bridge, and then change the flag on the sub
+ stream NIC device. After that, the bridge will regard all the I/O requests from the device to be
+ “unsupported request”. This error is generated on the bridge, so not only the root port status
+ records the error, but also the AER status of the bridge will reflect the error detail.
+
+ * I have not done many tests on this method. The only problem
+ I noticed is the NIC may become unstable and lost link after performing injection.
+
+* Reference
+ [1]: PCI-Express Base Specification Revision 1.1
+
+$FreeBSD$
\ No newline at end of file
Index: tools/tools/pci/ReadAER.c
===================================================================
--- /dev/null
+++ tools/tools/pci/ReadAER.c
@@ -0,0 +1,135 @@
+/*-
+ * Copyright (c) 2016 Isilon LLC, EMC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+
+
+struct AER_error_data {
+ int error_number;
+ const char *dev_name;
+ int unit;
+ int pcie_cap_status;
+ int unc_err_status;
+ int unc_err_seve;
+ int c_err_status;
+ int AER_ECRC;
+ int root_err_status;
+ int err_src_ID;
+ int c_err_src_ID;
+};
+
+void printThings(struct AER_error_data error_data);
+
+int main(int argc, char *argv[])
+{
+ int error;
+ int indexnum;
+ int name[4];
+ int count;
+ struct AER_error_data error_data;
+ size_t size;
+
+ size = sizeof(int);
+ error = sysctlbyname("hw.pci.pcib_error_count", &count, &size, NULL, 0);
+ if (error != 0) {
+ printf("Error: %d\n", error);
+ printf("Errno Translate: %s\n", strerror(errno));
+ return 0;
+ }
+
+ printf("Total %d error(s).\n", count);
+ printf("Old records may be overwritten, depends on driver's record length limit.\n");
+
+ if (argc != 2) {
+ printf("arg error: expect one argument: index of error to fetch, or -1 to fetch last record\n");
+ return 0;
+ }
+ indexnum = atoi(argv[1]);
+
+ if ((indexnum < -1) || (indexnum > count - 1) || (count == 0)) {
+ printf("Invalid argument\n");
+ return 0;
+ }
+
+ if (indexnum == -1)
+ indexnum = count -1;
+
+ size = 4;
+ sysctlnametomib("hw.pci.pcib_error_records", name, &size);
+ name[3] = indexnum;
+
+ bzero(&error_data, sizeof(error_data));
+ size = sizeof(error_data);
+
+ error = sysctl(name, 4, &error_data, &size, NULL, 0);
+ if (error != 0) {
+ printf("Error: %d\n", error);
+ printf("Errno Translate: %s\n", strerror(errno));
+ } else
+ printThings(error_data);
+ return 0;
+}
+
+void printDecHex(int value)
+{
+ printf(" %d, HEX: %X\n", value, value);
+}
+
+void printThings(struct AER_error_data error_data)
+{
+ printf("-------------------------------------\n");
+ printf("Error NO.%d\n", error_data.error_number);
+ printf("Error Source: pcib%d\n", error_data.unit);
+ printf("-------\n");
+
+ printf("Get: pcie_cap_status:");
+ printDecHex(error_data.pcie_cap_status);
+ printf("Get: unc_err_status:");
+ printDecHex(error_data.unc_err_status);
+ printf("Get: unc_err_seve:");
+ printDecHex(error_data.unc_err_seve);
+ printf("Get: c_err_status:");
+ printDecHex(error_data.c_err_status);
+ printf("Get: AER_ECRC:");
+ printDecHex(error_data.AER_ECRC);
+ printf("Get: root_err_status:");
+ printDecHex(error_data.root_err_status);
+ printf("Get: err_src_ID:");
+ printDecHex(error_data.err_src_ID);
+ printf("Get: c_err_src_ID:");
+ printDecHex(error_data.c_err_src_ID);
+
+ printf("------------------------------------\n");
+}
+
+
+

File Metadata

Mime Type
text/plain
Expires
Wed, Jan 21, 9:32 AM (10 h, 37 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27799262
Default Alt Text
D7697.diff (48 KB)

Event Timeline