Changeset View
Standalone View
usr.sbin/bhyve/pci_ahci.c
Show First 20 Lines • Show All 120 Lines • ▼ Show 20 Lines | |||||
#define AHCI_PORT_IDENT 20 + 1 | #define AHCI_PORT_IDENT 20 + 1 | ||||
struct ahci_ioreq { | struct ahci_ioreq { | ||||
struct blockif_req io_req; | struct blockif_req io_req; | ||||
struct ahci_port *io_pr; | struct ahci_port *io_pr; | ||||
STAILQ_ENTRY(ahci_ioreq) io_flist; | STAILQ_ENTRY(ahci_ioreq) io_flist; | ||||
TAILQ_ENTRY(ahci_ioreq) io_blist; | TAILQ_ENTRY(ahci_ioreq) io_blist; | ||||
uint8_t *cfis; | uint8_t *cfis; | ||||
uint8_t *dsm; | |||||
uint32_t len; | uint32_t len; | ||||
uint32_t done; | uint32_t done; | ||||
int slot; | int slot; | ||||
int more; | int more; | ||||
int readop; | int readop; | ||||
}; | }; | ||||
struct ahci_port { | struct ahci_port { | ||||
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines | struct pci_ahci_softc { | ||||
uint32_t em_ctl; | uint32_t em_ctl; | ||||
uint32_t cap2; | uint32_t cap2; | ||||
uint32_t bohc; | uint32_t bohc; | ||||
uint32_t lintr; | uint32_t lintr; | ||||
struct ahci_port port[MAX_PORTS]; | struct ahci_port port[MAX_PORTS]; | ||||
}; | }; | ||||
#define ahci_ctx(sc) ((sc)->asc_pi->pi_vmctx) | #define ahci_ctx(sc) ((sc)->asc_pi->pi_vmctx) | ||||
static void ahci_handle_next_trim(struct ahci_port *p, int slot, uint8_t *cfis, | |||||
uint8_t *buf, uint32_t len, uint32_t done); | |||||
static void ahci_handle_port(struct ahci_port *p); | static void ahci_handle_port(struct ahci_port *p); | ||||
static inline void lba_to_msf(uint8_t *buf, int lba) | static inline void lba_to_msf(uint8_t *buf, int lba) | ||||
{ | { | ||||
lba += 150; | lba += 150; | ||||
buf[0] = (lba / 75) / 60; | buf[0] = (lba / 75) / 60; | ||||
buf[1] = (lba / 75) % 60; | buf[1] = (lba / 75) % 60; | ||||
buf[2] = lba % 75; | buf[2] = lba % 75; | ||||
▲ Show 20 Lines • Show All 583 Lines • ▼ Show 20 Lines | for (i = 0; i < hdr->prdtl && len; i++) { | ||||
len -= sublen; | len -= sublen; | ||||
to += sublen; | to += sublen; | ||||
prdt++; | prdt++; | ||||
} | } | ||||
return (size - len); | return (size - len); | ||||
} | } | ||||
static void | static void | ||||
ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done) | ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis) | ||||
{ | { | ||||
struct ahci_ioreq *aior; | uint32_t len; | ||||
struct blockif_req *breq; | int ncq; | ||||
uint8_t *entry; | uint8_t *buf; | ||||
uint64_t elba; | unsigned int nread; | ||||
uint32_t len, elen; | |||||
int err, first, ncq; | |||||
uint8_t buf[512]; | |||||
unsigned int written; | |||||
first = (done == 0); | buf = NULL; | ||||
if (cfis[2] == ATA_DATA_SET_MANAGEMENT) { | if (cfis[2] == ATA_DATA_SET_MANAGEMENT) { | ||||
len = (uint16_t)cfis[13] << 8 | cfis[12]; | len = (uint16_t)cfis[13] << 8 | cfis[12]; | ||||
len *= 512; | len *= 512; | ||||
ncq = 0; | ncq = 0; | ||||
} else { /* ATA_SEND_FPDMA_QUEUED */ | } else { /* ATA_SEND_FPDMA_QUEUED */ | ||||
len = (uint16_t)cfis[11] << 8 | cfis[3]; | len = (uint16_t)cfis[11] << 8 | cfis[3]; | ||||
len *= 512; | len *= 512; | ||||
ncq = 1; | ncq = 1; | ||||
} | } | ||||
written = read_prdt(p, slot, cfis, buf, sizeof(buf)); | |||||
memset(buf + written, 0, sizeof(buf) - written); | |||||
next: | /* Support for only a single block is advertised via IDENTIFY. */ | ||||
if (done >= sizeof(buf) - 8) | if (len > 512) { | ||||
goto invalid_command; | |||||
} | |||||
buf = malloc(len); | |||||
nread = read_prdt(p, slot, cfis, buf, len); | |||||
if (nread != len) { | |||||
goto invalid_command; | |||||
} | |||||
ahci_handle_next_trim(p, slot, cfis, buf, len, 0); | |||||
return; | return; | ||||
invalid_command: | |||||
free(buf); | |||||
if (ncq) { | |||||
ahci_write_fis_d2h_ncq(p, slot); | |||||
ahci_write_fis_sdb(p, slot, cfis, | |||||
(ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); | |||||
} else { | |||||
ahci_write_fis_d2h(p, slot, cfis, | |||||
(ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); | |||||
} | |||||
} | |||||
static void | |||||
ahci_handle_next_trim(struct ahci_port *p, int slot, uint8_t *cfis, | |||||
uint8_t *buf, uint32_t len, uint32_t done) | |||||
{ | |||||
struct ahci_ioreq *aior; | |||||
struct blockif_req *breq; | |||||
uint8_t *entry; | |||||
uint64_t elba; | |||||
uint32_t elen; | |||||
int err; | |||||
bool first, ncq; | |||||
first = (done == 0); | |||||
if (cfis[2] == ATA_DATA_SET_MANAGEMENT) { | |||||
ncq = false; | |||||
} else { /* ATA_SEND_FPDMA_QUEUED */ | |||||
ncq = true; | |||||
emaste: Maybe a KASSERT to document that it must be `ATA_SEND_FPDMA_QUEUED`? | |||||
Done Inline ActionsSuch an assertion can fail if the guest modifies the CFIS while the command is in-progress. If we care about those races then we need a separate change to read and cache the CFIS at the start of command processing and free it after the command completes. Note that if the ncq flag is "wrong" we don't crash, we just write a different result into the FIS. This might confuse the guest, but it shouldn't impact the hypervisor. jhb: Such an assertion can fail if the guest modifies the CFIS while the command is in-progress. If… | |||||
Not Done Inline ActionsWould else if (cfis[2] == ATA_SEND_FPDMA_QUEUED) make sense? emaste: Would `else if (cfis[2] == ATA_SEND_FPDMA_QUEUED)` make sense? | |||||
Done Inline ActionsBut then what do you do in the third case? Especially given that this is in the continuation phase where we have already emitted at least one trim. Also, there are many other places that read CIFS multiple times in this device model. If we do care about such races, we will need to cache the CIFS instead of fixing all these places to fail with errors if the CIFS changed. jhb: But then what do you do in the third case? Especially given that this is in the continuation… | |||||
Not Done Inline ActionsSince there's only 32 cfis, and since they are small, it would be better to allocate them into a slot (like real hardware does) and pass that around instead of guest memory. It would be a better emulation of the DMA that's done, since the drive sees only one version of the CFIS, and it's undefined what happens if you change the CFIS after submitting the command. I'd also be tempted to say ncq = (cfis[2] == ATA_SEND_FPDMA_QUEUED) instead, so we only do ncq completion processing on the relatively rare ncq trim command (though we could avoid this whole mess by not advertising ncq trim support, but that would pessimize some applications that don't want to pay the queueing penalty on latency and the avoided mess is small). imp: Since there's only 32 cfis, and since they are small, it would be better to allocate them into… | |||||
Done Inline ActionsI'm happy to fix the model to cache the CIFS, that's just an orthogonal change and isn't TRIM specific. The main thing is I didn't read the SATA (or is it ATA?, I had to look at three different specs to try to understand AHCI) spec closely enough to determine what the upper bound on the CIFS size is. We can easily malloc a copy of it that we pass around, though we also need the original address still so that code can read the PRDT for commands that use it. Currently they just read from cifs + 0x80. jhb: I'm happy to fix the model to cache the CIFS, that's just an orthogonal change and isn't TRIM… | |||||
} | |||||
/* Find the next range to TRIM. */ | |||||
while (done < len) { | |||||
entry = &buf[done]; | entry = &buf[done]; | ||||
elba = ((uint64_t)entry[5] << 40) | | elba = ((uint64_t)entry[5] << 40) | | ||||
((uint64_t)entry[4] << 32) | | ((uint64_t)entry[4] << 32) | | ||||
((uint64_t)entry[3] << 24) | | ((uint64_t)entry[3] << 24) | | ||||
((uint64_t)entry[2] << 16) | | ((uint64_t)entry[2] << 16) | | ||||
((uint64_t)entry[1] << 8) | | ((uint64_t)entry[1] << 8) | | ||||
entry[0]; | entry[0]; | ||||
elen = (uint16_t)entry[7] << 8 | entry[6]; | elen = (uint16_t)entry[7] << 8 | entry[6]; | ||||
done += 8; | done += 8; | ||||
if (elen == 0) { | if (elen != 0) | ||||
if (done >= len) { | break; | ||||
} | |||||
/* All remaining ranges were empty. */ | |||||
if (done == len) { | |||||
free(buf); | |||||
if (ncq) { | if (ncq) { | ||||
if (first) | if (first) | ||||
ahci_write_fis_d2h_ncq(p, slot); | ahci_write_fis_d2h_ncq(p, slot); | ||||
ahci_write_fis_sdb(p, slot, cfis, | ahci_write_fis_sdb(p, slot, cfis, | ||||
ATA_S_READY | ATA_S_DSC); | ATA_S_READY | ATA_S_DSC); | ||||
} else { | } else { | ||||
ahci_write_fis_d2h(p, slot, cfis, | ahci_write_fis_d2h(p, slot, cfis, | ||||
ATA_S_READY | ATA_S_DSC); | ATA_S_READY | ATA_S_DSC); | ||||
} | } | ||||
if (!first) { | |||||
p->pending &= ~(1 << slot); | p->pending &= ~(1 << slot); | ||||
ahci_check_stopped(p); | ahci_check_stopped(p); | ||||
if (!first) | |||||
Done Inline ActionsThis being conditional in the old code did not make sense to me. I suspect it was a bug in the old code (not related to the SA) but you would only hit if you had a TRIM buffer that was completely empty (all lengths zero). jhb: This being conditional in the old code did not make sense to me. I suspect it was a bug in the… | |||||
Done Inline ActionsI don't remember what I was thinking back then, but looking on it now it seems to break recursion of ahci_handle_port() -> ahci_handle_slot() -> ahci_handle_cmd() -> ahci_handle_dsm_trim() -> ahci_handle_port(). mav: I don't remember what I was thinking back then, but looking on it now it seems to break… | |||||
Done Inline ActionsHmmm, ok. So I should put it back then I guess. jhb: Hmmm, ok. So I should put it back then I guess. | |||||
ahci_handle_port(p); | ahci_handle_port(p); | ||||
} | |||||
return; | return; | ||||
} | } | ||||
goto next; | |||||
} | |||||
/* | /* | ||||
* Pull request off free list | * Pull request off free list | ||||
*/ | */ | ||||
aior = STAILQ_FIRST(&p->iofhd); | aior = STAILQ_FIRST(&p->iofhd); | ||||
assert(aior != NULL); | assert(aior != NULL); | ||||
STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); | STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); | ||||
aior->cfis = cfis; | aior->cfis = cfis; | ||||
aior->slot = slot; | aior->slot = slot; | ||||
aior->len = len; | aior->len = len; | ||||
aior->done = done; | aior->done = done; | ||||
aior->dsm = buf; | |||||
aior->more = (len != done); | aior->more = (len != done); | ||||
breq = &aior->io_req; | breq = &aior->io_req; | ||||
breq->br_offset = elba * blockif_sectsz(p->bctx); | breq->br_offset = elba * blockif_sectsz(p->bctx); | ||||
breq->br_resid = elen * blockif_sectsz(p->bctx); | breq->br_resid = elen * blockif_sectsz(p->bctx); | ||||
/* | /* | ||||
* Mark this command in-flight. | * Mark this command in-flight. | ||||
*/ | */ | ||||
p->pending |= 1 << slot; | p->pending |= 1 << slot; | ||||
Done Inline ActionsDoes that mean I should not call this here? This is always "first". jhb: Does that mean I should not call this here? This is always "first". | |||||
Done Inline ActionsI think so. And not only ahci_handle_port(), but I suppose previous two lines also, since the command was never marked pending. mav: I think so. And not only ahci_handle_port(), but I suppose previous two lines also, since the… | |||||
/* | /* | ||||
* Stuff request onto busy list | * Stuff request onto busy list | ||||
*/ | */ | ||||
TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); | TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); | ||||
if (ncq && first) | if (ncq && first) | ||||
ahci_write_fis_d2h_ncq(p, slot); | ahci_write_fis_d2h_ncq(p, slot); | ||||
▲ Show 20 Lines • Show All 851 Lines • ▼ Show 20 Lines | case ATA_WRITE_FPDMA_QUEUED: | ||||
break; | break; | ||||
case ATA_FLUSHCACHE: | case ATA_FLUSHCACHE: | ||||
case ATA_FLUSHCACHE48: | case ATA_FLUSHCACHE48: | ||||
ahci_handle_flush(p, slot, cfis); | ahci_handle_flush(p, slot, cfis); | ||||
break; | break; | ||||
case ATA_DATA_SET_MANAGEMENT: | case ATA_DATA_SET_MANAGEMENT: | ||||
if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM && | if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM && | ||||
cfis[13] == 0 && cfis[12] == 1) { | cfis[13] == 0 && cfis[12] == 1) { | ||||
ahci_handle_dsm_trim(p, slot, cfis, 0); | ahci_handle_dsm_trim(p, slot, cfis); | ||||
break; | break; | ||||
} | } | ||||
ahci_write_fis_d2h(p, slot, cfis, | ahci_write_fis_d2h(p, slot, cfis, | ||||
(ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); | (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); | ||||
break; | break; | ||||
case ATA_SEND_FPDMA_QUEUED: | case ATA_SEND_FPDMA_QUEUED: | ||||
if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM && | if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM && | ||||
cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM && | cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM && | ||||
cfis[11] == 0 && cfis[3] == 1) { | cfis[11] == 0 && cfis[3] == 1) { | ||||
ahci_handle_dsm_trim(p, slot, cfis, 0); | ahci_handle_dsm_trim(p, slot, cfis); | ||||
break; | break; | ||||
} | } | ||||
ahci_write_fis_d2h(p, slot, cfis, | ahci_write_fis_d2h(p, slot, cfis, | ||||
(ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); | (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); | ||||
break; | break; | ||||
case ATA_READ_LOG_EXT: | case ATA_READ_LOG_EXT: | ||||
case ATA_READ_LOG_DMA_EXT: | case ATA_READ_LOG_DMA_EXT: | ||||
ahci_handle_read_log(p, slot, cfis); | ahci_handle_read_log(p, slot, cfis); | ||||
▲ Show 20 Lines • Show All 121 Lines • ▼ Show 20 Lines | |||||
static void | static void | ||||
ata_ioreq_cb(struct blockif_req *br, int err) | ata_ioreq_cb(struct blockif_req *br, int err) | ||||
{ | { | ||||
struct ahci_cmd_hdr *hdr; | struct ahci_cmd_hdr *hdr; | ||||
struct ahci_ioreq *aior; | struct ahci_ioreq *aior; | ||||
struct ahci_port *p; | struct ahci_port *p; | ||||
struct pci_ahci_softc *sc; | struct pci_ahci_softc *sc; | ||||
uint32_t tfd; | uint32_t tfd; | ||||
uint8_t *cfis; | uint8_t *cfis, *dsm; | ||||
int slot, ncq, dsm; | int slot, ncq; | ||||
DPRINTF("%s %d", __func__, err); | DPRINTF("%s %d", __func__, err); | ||||
ncq = dsm = 0; | ncq = 0; | ||||
aior = br->br_param; | aior = br->br_param; | ||||
p = aior->io_pr; | p = aior->io_pr; | ||||
cfis = aior->cfis; | cfis = aior->cfis; | ||||
slot = aior->slot; | slot = aior->slot; | ||||
sc = p->pr_sc; | sc = p->pr_sc; | ||||
hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); | hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); | ||||
if (cfis[2] == ATA_WRITE_FPDMA_QUEUED || | if (cfis[2] == ATA_WRITE_FPDMA_QUEUED || | ||||
cfis[2] == ATA_READ_FPDMA_QUEUED || | cfis[2] == ATA_READ_FPDMA_QUEUED || | ||||
cfis[2] == ATA_SEND_FPDMA_QUEUED) | cfis[2] == ATA_SEND_FPDMA_QUEUED) | ||||
ncq = 1; | ncq = 1; | ||||
if (cfis[2] == ATA_DATA_SET_MANAGEMENT || | dsm = aior->dsm; | ||||
(cfis[2] == ATA_SEND_FPDMA_QUEUED && | aior->dsm = NULL; | ||||
(cfis[13] & 0x1f) == ATA_SFPDMA_DSM)) | |||||
dsm = 1; | |||||
pthread_mutex_lock(&sc->mtx); | pthread_mutex_lock(&sc->mtx); | ||||
/* | /* | ||||
* Delete the blockif request from the busy list | * Delete the blockif request from the busy list | ||||
*/ | */ | ||||
TAILQ_REMOVE(&p->iobhd, aior, io_blist); | TAILQ_REMOVE(&p->iobhd, aior, io_blist); | ||||
/* | /* | ||||
* Move the blockif request back to the free list | * Move the blockif request back to the free list | ||||
*/ | */ | ||||
STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist); | STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist); | ||||
if (!err) | if (!err) | ||||
hdr->prdbc = aior->done; | hdr->prdbc = aior->done; | ||||
if (!err && aior->more) { | if (!err && aior->more) { | ||||
if (dsm) | if (dsm != NULL) | ||||
ahci_handle_dsm_trim(p, slot, cfis, aior->done); | ahci_handle_next_trim(p, slot, cfis, dsm, | ||||
aior->len, aior->done); | |||||
else | else | ||||
ahci_handle_rw(p, slot, cfis, aior->done); | ahci_handle_rw(p, slot, cfis, aior->done); | ||||
goto out; | goto out; | ||||
} | } | ||||
if (!err) | if (!err) | ||||
tfd = ATA_S_READY | ATA_S_DSC; | tfd = ATA_S_READY | ATA_S_DSC; | ||||
else | else | ||||
tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR; | tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR; | ||||
if (ncq) | if (ncq) | ||||
ahci_write_fis_sdb(p, slot, cfis, tfd); | ahci_write_fis_sdb(p, slot, cfis, tfd); | ||||
else | else | ||||
ahci_write_fis_d2h(p, slot, cfis, tfd); | ahci_write_fis_d2h(p, slot, cfis, tfd); | ||||
/* | /* | ||||
* This command is now complete. | * This command is now complete. | ||||
*/ | */ | ||||
p->pending &= ~(1 << slot); | p->pending &= ~(1 << slot); | ||||
ahci_check_stopped(p); | ahci_check_stopped(p); | ||||
ahci_handle_port(p); | ahci_handle_port(p); | ||||
free(dsm); | |||||
out: | out: | ||||
pthread_mutex_unlock(&sc->mtx); | pthread_mutex_unlock(&sc->mtx); | ||||
DPRINTF("%s exit", __func__); | DPRINTF("%s exit", __func__); | ||||
} | } | ||||
static void | static void | ||||
atapi_ioreq_cb(struct blockif_req *br, int err) | atapi_ioreq_cb(struct blockif_req *br, int err) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 770 Lines • Show Last 20 Lines |
Maybe a KASSERT to document that it must be ATA_SEND_FPDMA_QUEUED?