Changeset View
Changeset View
Standalone View
Standalone View
sys/dev/ioat/ioat_internal.h
Show First 20 Lines • Show All 420 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* Deprecated OPs -- v3 DMA generates an abort if given these. And this driver | * Deprecated OPs -- v3 DMA generates an abort if given these. And this driver | ||||
* doesn't support anything older than v3. | * doesn't support anything older than v3. | ||||
*/ | */ | ||||
#define IOAT_OP_OLD_XOR 0x85 | #define IOAT_OP_OLD_XOR 0x85 | ||||
#define IOAT_OP_OLD_XOR_VAL 0x86 | #define IOAT_OP_OLD_XOR_VAL 0x86 | ||||
enum ioat_ref_kind { | |||||
IOAT_DMAENGINE_REF = 0, | |||||
IOAT_ACTIVE_DESCR_REF, | |||||
IOAT_NUM_REF_KINDS | |||||
}; | |||||
/* One of these per allocated PCI device. */ | /* One of these per allocated PCI device. */ | ||||
struct ioat_softc { | struct ioat_softc { | ||||
bus_dmaengine_t dmaengine; | bus_dmaengine_t dmaengine; | ||||
#define to_ioat_softc(_dmaeng) \ | #define to_ioat_softc(_dmaeng) \ | ||||
({ \ | ({ \ | ||||
bus_dmaengine_t *_p = (_dmaeng); \ | bus_dmaengine_t *_p = (_dmaeng); \ | ||||
(struct ioat_softc *)((char *)_p - \ | (struct ioat_softc *)((char *)_p - \ | ||||
offsetof(struct ioat_softc, dmaengine)); \ | offsetof(struct ioat_softc, dmaengine)); \ | ||||
}) | }) | ||||
device_t device; | |||||
int version; | int version; | ||||
unsigned chan_idx; | unsigned chan_idx; | ||||
struct mtx submit_lock; | |||||
device_t device; | |||||
bus_space_tag_t pci_bus_tag; | bus_space_tag_t pci_bus_tag; | ||||
bus_space_handle_t pci_bus_handle; | bus_space_handle_t pci_bus_handle; | ||||
int pci_resource_id; | |||||
struct resource *pci_resource; | struct resource *pci_resource; | ||||
int pci_resource_id; | |||||
uint32_t max_xfer_size; | uint32_t max_xfer_size; | ||||
uint32_t capabilities; | uint32_t capabilities; | ||||
uint32_t ring_size_order; | |||||
uint16_t intrdelay_max; | uint16_t intrdelay_max; | ||||
uint16_t cached_intrdelay; | uint16_t cached_intrdelay; | ||||
struct resource *res; | |||||
int rid; | int rid; | ||||
struct resource *res; | |||||
void *tag; | void *tag; | ||||
bus_dma_tag_t hw_desc_tag; | bus_dma_tag_t hw_desc_tag; | ||||
bus_dmamap_t hw_desc_map; | bus_dmamap_t hw_desc_map; | ||||
bus_dma_tag_t comp_update_tag; | bus_dma_tag_t comp_update_tag; | ||||
bus_dmamap_t comp_update_map; | bus_dmamap_t comp_update_map; | ||||
uint64_t *comp_update; | uint64_t *comp_update; | ||||
bus_addr_t comp_update_bus_addr; | bus_addr_t comp_update_bus_addr; | ||||
struct callout poll_timer; | |||||
struct callout shrink_timer; | |||||
struct task reset_task; | |||||
boolean_t quiescing; | boolean_t quiescing; | ||||
boolean_t destroying; | boolean_t destroying; | ||||
boolean_t is_submitter_processing; | boolean_t is_submitter_processing; | ||||
boolean_t is_completion_pending; /* submit_lock */ | |||||
boolean_t is_reset_pending; | |||||
boolean_t is_channel_running; | |||||
boolean_t intrdelay_supported; | boolean_t intrdelay_supported; | ||||
boolean_t resetting; /* submit_lock */ | boolean_t resetting; /* submit_lock */ | ||||
boolean_t resetting_cleanup; /* cleanup_lock */ | boolean_t resetting_cleanup; /* cleanup_lock */ | ||||
uint32_t head; | |||||
uint32_t acq_head; | |||||
uint32_t tail; | |||||
uint32_t hw_head; | |||||
uint32_t ring_size_order; | |||||
bus_addr_t last_seen; | |||||
struct ioat_descriptor *ring; | struct ioat_descriptor *ring; | ||||
union ioat_hw_descriptor { | union ioat_hw_descriptor { | ||||
struct ioat_generic_hw_descriptor generic; | struct ioat_generic_hw_descriptor generic; | ||||
struct ioat_dma_hw_descriptor dma; | struct ioat_dma_hw_descriptor dma; | ||||
struct ioat_fill_hw_descriptor fill; | struct ioat_fill_hw_descriptor fill; | ||||
struct ioat_crc32_hw_descriptor crc32; | struct ioat_crc32_hw_descriptor crc32; | ||||
struct ioat_xor_hw_descriptor xor; | struct ioat_xor_hw_descriptor xor; | ||||
struct ioat_xor_ext_hw_descriptor xor_ext; | struct ioat_xor_ext_hw_descriptor xor_ext; | ||||
struct ioat_pq_hw_descriptor pq; | struct ioat_pq_hw_descriptor pq; | ||||
struct ioat_pq_ext_hw_descriptor pq_ext; | struct ioat_pq_ext_hw_descriptor pq_ext; | ||||
struct ioat_raw_hw_descriptor raw; | struct ioat_raw_hw_descriptor raw; | ||||
} *hw_desc_ring; | } *hw_desc_ring; | ||||
bus_addr_t hw_desc_bus_addr; | bus_addr_t hw_desc_bus_addr; | ||||
#define RING_PHYS_ADDR(sc, i) (sc)->hw_desc_bus_addr + \ | #define RING_PHYS_ADDR(sc, i) (sc)->hw_desc_bus_addr + \ | ||||
(((i) % (1 << (sc)->ring_size_order)) * sizeof(struct ioat_dma_hw_descriptor)) | (((i) % (1 << (sc)->ring_size_order)) * sizeof(struct ioat_dma_hw_descriptor)) | ||||
struct mtx_padalign submit_lock; | |||||
struct mtx cleanup_lock; | struct mtx cleanup_lock; | ||||
cem: What is the motivation for placing the cleanup lock adjacent to the submit lock? Doesn't that… | |||||
Done Inline ActionsUse of mtx_padalign instead of mtx puts them on different cache lines, so no conflict. My motivation for reorder was to move all (mostly) static elements into separate cache lines from heavily congested. I was thinking about also separating below fields too, but unfortunately both submission and completion access both head and tail. mav: Use of mtx_padalign instead of mtx puts them on different cache lines, so no conflict. My… | |||||
Not Done Inline ActionsAt least some amd64 models will prefetch adjacent cachelines. Might it make more sense to put another cache line in between the two lock lines? We have plenty of filler material in the earlier part of the softc. I don't feel strongly about this; it's fine as you have it. cem: At least some amd64 models will prefetch adjacent cachelines. Might it make more sense to put… | |||||
volatile uint32_t refcnt; | |||||
#ifdef INVARIANTS | struct task reset_task; | ||||
volatile uint32_t refkinds[IOAT_NUM_REF_KINDS]; | struct callout poll_timer; | ||||
#endif | |||||
uint32_t refcnt; | |||||
uint32_t head; | |||||
uint32_t acq_head; | |||||
uint32_t tail; | |||||
bus_addr_t last_seen; | |||||
struct { | struct { | ||||
uint64_t interrupts; | uint64_t interrupts; | ||||
uint64_t descriptors_processed; | uint64_t descriptors_processed; | ||||
uint64_t descriptors_error; | uint64_t descriptors_error; | ||||
uint64_t descriptors_submitted; | uint64_t descriptors_submitted; | ||||
uint32_t channel_halts; | uint32_t channel_halts; | ||||
▲ Show 20 Lines • Show All 95 Lines • Show Last 20 Lines |
What is the motivation for placing the cleanup lock adjacent to the submit lock? Doesn't that somewhat defeat the point of having multiple locks? It's also unclear to me why the members below were reordered.