Index: head/sbin/nvmecontrol/devlist.c =================================================================== --- head/sbin/nvmecontrol/devlist.c +++ head/sbin/nvmecontrol/devlist.c @@ -54,8 +54,14 @@ static inline uint32_t ns_get_sector_size(struct nvme_namespace_data *nsdata) { + uint8_t flbas_fmt, lbads; - return (1 << nsdata->lbaf[nsdata->flbas.format].lbads); + flbas_fmt = (nsdata->flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & + NVME_NS_DATA_FLBAS_FORMAT_MASK; + lbads = (nsdata->lbaf[flbas_fmt] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) & + NVME_NS_DATA_LBAF_LBADS_MASK; + + return (1 << lbads); } void Index: head/sbin/nvmecontrol/firmware.c =================================================================== --- head/sbin/nvmecontrol/firmware.c +++ head/sbin/nvmecontrol/firmware.c @@ -125,9 +125,9 @@ memcpy(chunk, payload + off, size); memset(&pt, 0, sizeof(pt)); - pt.cmd.opc = NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD; - pt.cmd.cdw10 = (size / sizeof(uint32_t)) - 1; - pt.cmd.cdw11 = (off / sizeof(uint32_t)); + pt.cmd.opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD); + pt.cmd.cdw10 = htole32((size / sizeof(uint32_t)) - 1); + pt.cmd.cdw11 = htole32(off / sizeof(uint32_t)); pt.buf = chunk; pt.len = size; pt.is_read = 0; @@ -147,17 +147,21 @@ activate_firmware(int fd, int slot, int activate_action) { struct nvme_pt_command pt; + uint16_t sct, sc; memset(&pt, 0, sizeof(pt)); - pt.cmd.opc = NVME_OPC_FIRMWARE_ACTIVATE; - pt.cmd.cdw10 = (activate_action << 3) | slot; + pt.cmd.opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_FIRMWARE_ACTIVATE); + pt.cmd.cdw10 = htole32((activate_action << 3) | slot); pt.is_read = 0; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "firmware activate request failed"); - if (pt.cpl.status.sct == NVME_SCT_COMMAND_SPECIFIC && - pt.cpl.status.sc == NVME_SC_FIRMWARE_REQUIRES_RESET) + sct = NVME_STATUS_GET_SCT(pt.cpl.status); + sc = NVME_STATUS_GET_SC(pt.cpl.status); + + if (sct == NVME_SCT_COMMAND_SPECIFIC && + sc == NVME_SC_FIRMWARE_REQUIRES_RESET) return 1; if (nvme_completion_is_error(&pt.cpl)) @@ -180,16 +184,19 @@ int fd = -1, slot = 0; int a_flag, s_flag, f_flag; int activate_action, reboot_required; - char ch, *p, *image = NULL; + int opt; + char *p, *image = NULL; char *controller = NULL, prompt[64]; void *buf = NULL; int32_t size = 0; + uint16_t oacs_fw; + uint8_t fw_slot1_ro, fw_num_slots; struct nvme_controller_data cdata; a_flag = s_flag = f_flag = false; - while ((ch = getopt(argc, argv, "af:s:")) != -1) { - switch (ch) { + while ((opt = getopt(argc, argv, "af:s:")) != -1) { + switch (opt) { case 'a': a_flag = true; break; @@ -243,17 +250,26 @@ open_dev(controller, &fd, 1, 1); read_controller_data(fd, &cdata); - if (cdata.oacs.firmware == 0) + oacs_fw = (cdata.oacs >> NVME_CTRLR_DATA_OACS_FIRMWARE_SHIFT) & + NVME_CTRLR_DATA_OACS_FIRMWARE_MASK; + + if (oacs_fw == 0) errx(1, "controller does not support firmware activate/download"); - if (f_flag && slot == 1 && cdata.frmw.slot1_ro) + fw_slot1_ro = (cdata.frmw >> NVME_CTRLR_DATA_FRMW_SLOT1_RO_SHIFT) & + NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK; + + if (f_flag && slot == 1 && fw_slot1_ro) errx(1, "slot %d is marked as read only", slot); - if (slot > cdata.frmw.num_slots) + fw_num_slots = (cdata.frmw >> NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT) & + NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK; + + if (slot > fw_num_slots) errx(1, "slot %d specified but controller only supports %d slots", - slot, cdata.frmw.num_slots); + slot, fw_num_slots); if (a_flag && !f_flag && !slot_has_valid_firmware(fd, slot)) errx(1, Index: head/sbin/nvmecontrol/identify.c =================================================================== --- head/sbin/nvmecontrol/identify.c +++ head/sbin/nvmecontrol/identify.c @@ -47,7 +47,52 @@ { uint8_t str[128]; char cbuf[UINT128_DIG + 1]; + uint16_t oncs, oacs; + uint8_t compare, write_unc, dsm, vwc_present; + uint8_t security, fmt, fw, nsmgmt; + uint8_t fw_slot1_ro, fw_num_slots; + uint8_t ns_smart; + uint8_t sqes_max, sqes_min; + uint8_t cqes_max, cqes_min; + oncs = cdata->oncs; + compare = (oncs >> NVME_CTRLR_DATA_ONCS_COMPARE_SHIFT) & + NVME_CTRLR_DATA_ONCS_COMPARE_MASK; + write_unc = (oncs >> NVME_CTRLR_DATA_ONCS_WRITE_UNC_SHIFT) & + NVME_CTRLR_DATA_ONCS_WRITE_UNC_MASK; + dsm = (oncs >> NVME_CTRLR_DATA_ONCS_DSM_SHIFT) & + NVME_CTRLR_DATA_ONCS_DSM_MASK; + vwc_present = (cdata->vwc >> NVME_CTRLR_DATA_VWC_PRESENT_SHIFT) & + NVME_CTRLR_DATA_VWC_PRESENT_MASK; + + oacs = cdata->oacs; + security = (oacs >> NVME_CTRLR_DATA_OACS_SECURITY_SHIFT) & + NVME_CTRLR_DATA_OACS_SECURITY_MASK; + fmt = (oacs >> NVME_CTRLR_DATA_OACS_FORMAT_SHIFT) & + NVME_CTRLR_DATA_OACS_FORMAT_MASK; + fw = (oacs >> NVME_CTRLR_DATA_OACS_FIRMWARE_SHIFT) & + NVME_CTRLR_DATA_OACS_FIRMWARE_MASK; + nsmgmt = (oacs >> NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT) & + NVME_CTRLR_DATA_OACS_NSMGMT_MASK; + + fw_num_slots = (cdata->frmw >> NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT) & + NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK; + fw_slot1_ro = (cdata->frmw >> NVME_CTRLR_DATA_FRMW_SLOT1_RO_SHIFT) & + NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK; + + ns_smart = (cdata->lpa >> NVME_CTRLR_DATA_LPA_NS_SMART_SHIFT) & + NVME_CTRLR_DATA_LPA_NS_SMART_MASK; + + sqes_min = (cdata->sqes >> NVME_CTRLR_DATA_SQES_MIN_SHIFT) & + NVME_CTRLR_DATA_SQES_MIN_MASK; + sqes_max = (cdata->sqes >> NVME_CTRLR_DATA_SQES_MAX_SHIFT) & + NVME_CTRLR_DATA_SQES_MAX_MASK; + + cqes_min = (cdata->cqes >> NVME_CTRLR_DATA_CQES_MIN_SHIFT) & + NVME_CTRLR_DATA_CQES_MIN_MASK; + cqes_max = (cdata->cqes >> NVME_CTRLR_DATA_CQES_MAX_SHIFT) & + NVME_CTRLR_DATA_CQES_MAX_MASK; + printf("Controller Capabilities/Features\n"); printf("================================\n"); printf("Vendor ID: %04x\n", cdata->vid); @@ -67,34 +112,34 @@ if (cdata->mdts == 0) printf("Unlimited\n"); else - printf("%d\n", PAGE_SIZE * (1 << cdata->mdts)); + printf("%ld\n", PAGE_SIZE * (1 << cdata->mdts)); printf("Controller ID: 0x%02x\n", cdata->ctrlr_id); printf("\n"); printf("Admin Command Set Attributes\n"); printf("============================\n"); printf("Security Send/Receive: %s\n", - cdata->oacs.security ? "Supported" : "Not Supported"); + security ? "Supported" : "Not Supported"); printf("Format NVM: %s\n", - cdata->oacs.format ? "Supported" : "Not Supported"); + fmt ? "Supported" : "Not Supported"); printf("Firmware Activate/Download: %s\n", - cdata->oacs.firmware ? "Supported" : "Not Supported"); + fw ? "Supported" : "Not Supported"); printf("Namespace Managment: %s\n", - cdata->oacs.nsmgmt ? "Supported" : "Not Supported"); + nsmgmt ? "Supported" : "Not Supported"); printf("Abort Command Limit: %d\n", cdata->acl+1); printf("Async Event Request Limit: %d\n", cdata->aerl+1); printf("Number of Firmware Slots: "); - if (cdata->oacs.firmware != 0) - printf("%d\n", cdata->frmw.num_slots); + if (fw != 0) + printf("%d\n", fw_num_slots); else printf("N/A\n"); printf("Firmware Slot 1 Read-Only: "); - if (cdata->oacs.firmware != 0) - printf("%s\n", cdata->frmw.slot1_ro ? "Yes" : "No"); + if (fw != 0) + printf("%s\n", fw_slot1_ro ? "Yes" : "No"); else printf("N/A\n"); printf("Per-Namespace SMART Log: %s\n", - cdata->lpa.ns_smart ? "Yes" : "No"); + ns_smart ? "Yes" : "No"); printf("Error Log Page Entries: %d\n", cdata->elpe+1); printf("Number of Power States: %d\n", cdata->npss+1); @@ -102,22 +147,22 @@ printf("NVM Command Set Attributes\n"); printf("==========================\n"); printf("Submission Queue Entry Size\n"); - printf(" Max: %d\n", 1 << cdata->sqes.max); - printf(" Min: %d\n", 1 << cdata->sqes.min); + printf(" Max: %d\n", 1 << sqes_max); + printf(" Min: %d\n", 1 << sqes_min); printf("Completion Queue Entry Size\n"); - printf(" Max: %d\n", 1 << cdata->cqes.max); - printf(" Min: %d\n", 1 << cdata->cqes.min); + printf(" Max: %d\n", 1 << cqes_max); + printf(" Min: %d\n", 1 << cqes_min); printf("Number of Namespaces: %d\n", cdata->nn); printf("Compare Command: %s\n", - cdata->oncs.compare ? "Supported" : "Not Supported"); + compare ? "Supported" : "Not Supported"); printf("Write Uncorrectable Command: %s\n", - cdata->oncs.write_unc ? "Supported" : "Not Supported"); + write_unc ? "Supported" : "Not Supported"); printf("Dataset Management Command: %s\n", - cdata->oncs.dsm ? "Supported" : "Not Supported"); + dsm ? "Supported" : "Not Supported"); printf("Volatile Write Cache: %s\n", - cdata->vwc.present ? "Present" : "Not Present"); + vwc_present ? "Present" : "Not Present"); - if (cdata->oacs.nsmgmt) { + if (nsmgmt) { printf("\n"); printf("Namespace Drive Attributes\n"); printf("==========================\n"); @@ -132,7 +177,16 @@ print_namespace(struct nvme_namespace_data *nsdata) { uint32_t i; + uint32_t lbaf, lbads, ms; + uint8_t thin_prov; + uint8_t flbas_fmt; + thin_prov = (nsdata->nsfeat >> NVME_NS_DATA_NSFEAT_THIN_PROV_SHIFT) & + NVME_NS_DATA_NSFEAT_THIN_PROV_MASK; + + flbas_fmt = (nsdata->flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & + NVME_NS_DATA_FLBAS_FORMAT_MASK; + printf("Size (in LBAs): %lld (%lldM)\n", (long long)nsdata->nsze, (long long)nsdata->nsze / 1024 / 1024); @@ -143,13 +197,18 @@ (long long)nsdata->nuse, (long long)nsdata->nuse / 1024 / 1024); printf("Thin Provisioning: %s\n", - nsdata->nsfeat.thin_prov ? "Supported" : "Not Supported"); + thin_prov ? "Supported" : "Not Supported"); printf("Number of LBA Formats: %d\n", nsdata->nlbaf+1); - printf("Current LBA Format: LBA Format #%02d\n", - nsdata->flbas.format); - for (i = 0; i <= nsdata->nlbaf; i++) + printf("Current LBA Format: LBA Format #%02d\n", flbas_fmt); + for (i = 0; i <= nsdata->nlbaf; i++) { + lbaf = nsdata->lbaf[i]; + lbads = (lbaf >> NVME_NS_DATA_LBAF_LBADS_SHIFT) & + NVME_NS_DATA_LBAF_LBADS_MASK; + ms = (lbaf >> NVME_NS_DATA_LBAF_MS_SHIFT) & + NVME_NS_DATA_LBAF_MS_MASK; printf("LBA Format #%02d: Data Size: %5d Metadata Size: %5d\n", - i, 1 << nsdata->lbaf[i].lbads, nsdata->lbaf[i].ms); + i, 1 << lbads, ms); + } } static void Index: head/sbin/nvmecontrol/logpage.c =================================================================== --- head/sbin/nvmecontrol/logpage.c +++ head/sbin/nvmecontrol/logpage.c @@ -46,10 +46,6 @@ #include #include -#if _BYTE_ORDER != _LITTLE_ENDIAN -#error "Code only works on little endian machines" -#endif - #include "nvmecontrol.h" #define DEFAULT_SIZE (4096) @@ -107,12 +103,15 @@ uint32_t payload_size) { struct nvme_pt_command pt; + struct nvme_error_information_entry *err_entry; + int i, err_pages; memset(&pt, 0, sizeof(pt)); - pt.cmd.opc = NVME_OPC_GET_LOG_PAGE; - pt.cmd.nsid = nsid; + pt.cmd.opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_GET_LOG_PAGE); + pt.cmd.nsid = htole32(nsid); pt.cmd.cdw10 = ((payload_size/sizeof(uint32_t)) - 1) << 16; pt.cmd.cdw10 |= log_page; + pt.cmd.cdw10 = htole32(pt.cmd.cdw10); pt.buf = payload; pt.len = payload_size; pt.is_read = 1; @@ -120,6 +119,30 @@ if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "get log page request failed"); + /* Convert data to host endian */ + switch (log_page) { + case NVME_LOG_ERROR: + err_entry = (struct nvme_error_information_entry *)payload; + err_pages = payload_size / sizeof(struct nvme_error_information_entry); + for (i = 0; i < err_pages; i++) + nvme_error_information_entry_swapbytes(err_entry++); + break; + case NVME_LOG_HEALTH_INFORMATION: + nvme_health_information_page_swapbytes( + (struct nvme_health_information_page *)payload); + break; + case NVME_LOG_FIRMWARE_SLOT: + nvme_firmware_page_swapbytes( + (struct nvme_firmware_page *)payload); + break; + case INTEL_LOG_TEMP_STATS: + intel_log_temp_stats_swapbytes( + (struct intel_log_temp_stats *)payload); + break; + default: + break; + } + if (nvme_completion_is_error(&pt.cpl)) errx(1, "get log page request returned error"); } @@ -128,8 +151,9 @@ print_log_error(const struct nvme_controller_data *cdata __unused, void *buf, uint32_t size) { int i, nentries; + uint16_t status; + uint8_t p, sc, sct, m, dnr; struct nvme_error_information_entry *entry = buf; - struct nvme_status *status; printf("Error Information Log\n"); printf("=====================\n"); @@ -144,7 +168,14 @@ if (entry->error_count == 0) break; - status = &entry->status; + status = entry->status; + + p = NVME_STATUS_GET_P(status); + sc = NVME_STATUS_GET_SC(status); + sct = NVME_STATUS_GET_SCT(status); + m = NVME_STATUS_GET_M(status); + dnr = NVME_STATUS_GET_DNR(status); + printf("Entry %02d\n", i + 1); printf("=========\n"); printf(" Error count: %ju\n", entry->error_count); @@ -152,11 +183,11 @@ printf(" Command ID: %u\n", entry->cid); /* TODO: Export nvme_status_string structures from kernel? */ printf(" Status:\n"); - printf(" Phase tag: %d\n", status->p); - printf(" Status code: %d\n", status->sc); - printf(" Status code type: %d\n", status->sct); - printf(" More: %d\n", status->m); - printf(" DNR: %d\n", status->dnr); + printf(" Phase tag: %d\n", p); + printf(" Status code: %d\n", sc); + printf(" Status code type: %d\n", sct); + printf(" More: %d\n", m); + printf(" DNR: %d\n", dnr); printf(" Error location: %u\n", entry->error_location); printf(" LBA: %ju\n", entry->lba); printf(" Namespace ID: %u\n", entry->nsid); @@ -176,23 +207,25 @@ { struct nvme_health_information_page *health = buf; char cbuf[UINT128_DIG + 1]; + uint8_t warning; int i; + warning = health->critical_warning; + printf("SMART/Health Information Log\n"); printf("============================\n"); - printf("Critical Warning State: 0x%02x\n", - health->critical_warning.raw); + printf("Critical Warning State: 0x%02x\n", warning); printf(" Available spare: %d\n", - health->critical_warning.bits.available_spare); + !!(warning & NVME_CRIT_WARN_ST_AVAILABLE_SPARE)); printf(" Temperature: %d\n", - health->critical_warning.bits.temperature); + !!(warning & NVME_CRIT_WARN_ST_TEMPERATURE)); printf(" Device reliability: %d\n", - health->critical_warning.bits.device_reliability); + !!(warning & NVME_CRIT_WARN_ST_DEVICE_RELIABILITY)); printf(" Read only: %d\n", - health->critical_warning.bits.read_only); + !!(warning & NVME_CRIT_WARN_ST_READ_ONLY)); printf(" Volatile memory backup: %d\n", - health->critical_warning.bits.volatile_memory_backup); + !!(warning & NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP)); printf("Temperature: "); print_temp(health->temperature); printf("Available spare: %u\n", @@ -225,7 +258,7 @@ printf("Warning Temp Composite Time: %d\n", health->warning_temp_time); printf("Error Temp Composite Time: %d\n", health->error_temp_time); - for (i = 0; i < 7; i++) { + for (i = 0; i < 8; i++) { if (health->temp_sensor[i] == 0) continue; printf("Temperature Sensor %d: ", i + 1); @@ -234,23 +267,34 @@ } static void -print_log_firmware(const struct nvme_controller_data *cdata __unused, void *buf, uint32_t size __unused) +print_log_firmware(const struct nvme_controller_data *cdata, void *buf, uint32_t size __unused) { int i, slots; const char *status; struct nvme_firmware_page *fw = buf; + uint8_t afi_slot; + uint16_t oacs_fw; + uint8_t fw_num_slots; + afi_slot = fw->afi >> NVME_FIRMWARE_PAGE_AFI_SLOT_SHIFT; + afi_slot &= NVME_FIRMWARE_PAGE_AFI_SLOT_MASK; + + oacs_fw = (cdata->oacs >> NVME_CTRLR_DATA_OACS_FIRMWARE_SHIFT) & + NVME_CTRLR_DATA_OACS_FIRMWARE_MASK; + fw_num_slots = (cdata->frmw >> NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT) & + NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK; + printf("Firmware Slot Log\n"); printf("=================\n"); - if (cdata->oacs.firmware == 0) + if (oacs_fw == 0) slots = 1; else - slots = MIN(cdata->frmw.num_slots, MAX_FW_SLOTS); + slots = MIN(fw_num_slots, MAX_FW_SLOTS); for (i = 0; i < slots; i++) { printf("Slot %d: ", i + 1); - if (fw->afi.slot == i + 1) + if (afi_slot == i + 1) status = " Active"; else status = "Inactive"; @@ -868,7 +912,8 @@ int fd, nsid; int log_page = 0, pageflag = false; int binflag = false, hexflag = false, ns_specified; - char ch, *p; + int opt; + char *p; char cname[64]; uint32_t size; void *buf; @@ -876,9 +921,10 @@ struct logpage_function *f; struct nvme_controller_data cdata; print_fn_t print_fn; + uint8_t ns_smart; - while ((ch = getopt(argc, argv, "bp:xv:")) != -1) { - switch (ch) { + while ((opt = getopt(argc, argv, "bp:xv:")) != -1) { + switch (opt) { case 'b': binflag = true; break; @@ -928,6 +974,9 @@ read_controller_data(fd, &cdata); + ns_smart = (cdata.lpa >> NVME_CTRLR_DATA_LPA_NS_SMART_SHIFT) & + NVME_CTRLR_DATA_LPA_NS_SMART_MASK; + /* * The log page attribtues indicate whether or not the controller * supports the SMART/Health information log page on a per @@ -937,7 +986,7 @@ if (log_page != NVME_LOG_HEALTH_INFORMATION) errx(1, "log page %d valid only at controller level", log_page); - if (cdata.lpa.ns_smart == 0) + if (ns_smart == 0) errx(1, "controller does not support per namespace " "smart/health information"); Index: head/sbin/nvmecontrol/nvmecontrol.c =================================================================== --- head/sbin/nvmecontrol/nvmecontrol.c +++ head/sbin/nvmecontrol/nvmecontrol.c @@ -146,8 +146,8 @@ struct nvme_pt_command pt; memset(&pt, 0, sizeof(pt)); - pt.cmd.opc = NVME_OPC_IDENTIFY; - pt.cmd.cdw10 = 1; + pt.cmd.opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_IDENTIFY); + pt.cmd.cdw10 = htole32(1); pt.buf = cdata; pt.len = sizeof(*cdata); pt.is_read = 1; @@ -155,6 +155,9 @@ if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "identify request failed"); + /* Convert data to host endian */ + nvme_controller_data_swapbytes(cdata); + if (nvme_completion_is_error(&pt.cpl)) errx(1, "identify request returned error"); } @@ -165,14 +168,17 @@ struct nvme_pt_command pt; memset(&pt, 0, sizeof(pt)); - pt.cmd.opc = NVME_OPC_IDENTIFY; - pt.cmd.nsid = nsid; + pt.cmd.opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_IDENTIFY); + pt.cmd.nsid = htole32(nsid); pt.buf = nsdata; pt.len = sizeof(*nsdata); pt.is_read = 1; if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "identify request failed"); + + /* Convert data to host endian */ + nvme_namespace_data_swapbytes(nsdata); if (nvme_completion_is_error(&pt.cpl)) errx(1, "identify request returned error"); Index: head/sbin/nvmecontrol/perftest.c =================================================================== --- head/sbin/nvmecontrol/perftest.c +++ head/sbin/nvmecontrol/perftest.c @@ -81,7 +81,7 @@ { struct nvme_io_test io_test; int fd; - char ch; + int opt; char *p; u_long ioctl_cmd = NVME_IO_TEST; bool nflag, oflag, sflag, tflag; @@ -91,8 +91,8 @@ memset(&io_test, 0, sizeof(io_test)); - while ((ch = getopt(argc, argv, "f:i:n:o:ps:t:")) != -1) { - switch (ch) { + while ((opt = getopt(argc, argv, "f:i:n:o:ps:t:")) != -1) { + switch (opt) { case 'f': if (!strcmp(optarg, "refthread")) io_test.flags |= NVME_TEST_FLAG_REFTHREAD; Index: head/sbin/nvmecontrol/power.c =================================================================== --- head/sbin/nvmecontrol/power.c +++ head/sbin/nvmecontrol/power.c @@ -56,22 +56,32 @@ power_list_one(int i, struct nvme_power_state *nps) { int mpower, apower, ipower; + uint8_t mps, nops, aps, apw; + mps = (nps->mps_nops >> NVME_PWR_ST_MPS_SHIFT) & + NVME_PWR_ST_MPS_MASK; + nops = (nps->mps_nops >> NVME_PWR_ST_NOPS_SHIFT) & + NVME_PWR_ST_NOPS_MASK; + apw = (nps->apw_aps >> NVME_PWR_ST_APW_SHIFT) & + NVME_PWR_ST_APW_MASK; + aps = (nps->apw_aps >> NVME_PWR_ST_APS_SHIFT) & + NVME_PWR_ST_APS_MASK; + mpower = nps->mp; - if (nps->mps == 0) + if (mps == 0) mpower *= 100; ipower = nps->idlp; if (nps->ips == 1) ipower *= 100; apower = nps->actp; - if (nps->aps == 1) + if (aps == 1) apower *= 100; printf("%2d: %2d.%04dW%c %3d.%03dms %3d.%03dms %2d %2d %2d %2d %2d.%04dW %2d.%04dW %d\n", i, mpower / 10000, mpower % 10000, - nps->nops ? '*' : ' ', nps->enlat / 1000, nps->enlat % 1000, + nops ? '*' : ' ', nps->enlat / 1000, nps->enlat % 1000, nps->exlat / 1000, nps->exlat % 1000, nps->rrt, nps->rrl, nps->rwt, nps->rwl, ipower / 10000, ipower % 10000, - apower / 10000, apower % 10000, nps->apw); + apower / 10000, apower % 10000, apw); } static void @@ -94,9 +104,9 @@ p = perm ? (1u << 31) : 0; memset(&pt, 0, sizeof(pt)); - pt.cmd.opc = NVME_OPC_SET_FEATURES; - pt.cmd.cdw10 = NVME_FEAT_POWER_MANAGEMENT | p; - pt.cmd.cdw11 = power_val | (workload << 5); + pt.cmd.opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_SET_FEATURES); + pt.cmd.cdw10 = htole32(NVME_FEAT_POWER_MANAGEMENT | p); + pt.cmd.cdw11 = htole32(power_val | (workload << 5)); if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "set feature power mgmt request failed"); @@ -111,8 +121,8 @@ struct nvme_pt_command pt; memset(&pt, 0, sizeof(pt)); - pt.cmd.opc = NVME_OPC_GET_FEATURES; - pt.cmd.cdw10 = NVME_FEAT_POWER_MANAGEMENT; + pt.cmd.opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_GET_FEATURES); + pt.cmd.cdw10 = htole32(NVME_FEAT_POWER_MANAGEMENT); if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) err(1, "set feature power mgmt request failed"); Index: head/sbin/nvmecontrol/wdc.c =================================================================== --- head/sbin/nvmecontrol/wdc.c +++ head/sbin/nvmecontrol/wdc.c @@ -81,10 +81,10 @@ struct nvme_pt_command pt; memset(&pt, 0, sizeof(pt)); - pt.cmd.opc = opcode; - pt.cmd.cdw10 = len / sizeof(uint32_t); /* - 1 like all the others ??? */ - pt.cmd.cdw11 = off / sizeof(uint32_t); - pt.cmd.cdw12 = cmd; + pt.cmd.opc_fuse = NVME_CMD_SET_OPC(opcode); + pt.cmd.cdw10 = htole32(len / sizeof(uint32_t)); /* - 1 like all the others ??? */ + pt.cmd.cdw11 = htole32(off / sizeof(uint32_t)); + pt.cmd.cdw12 = htole32(cmd); pt.buf = buffer; pt.len = buflen; pt.is_read = 1; Index: head/sys/cam/nvme/nvme_all.c =================================================================== --- head/sys/cam/nvme/nvme_all.c +++ head/sys/cam/nvme/nvme_all.c @@ -70,14 +70,14 @@ uint32_t cdw14, uint32_t cdw15) { bzero(&nvmeio->cmd, sizeof(struct nvme_command)); - nvmeio->cmd.opc = cmd; - nvmeio->cmd.nsid = nsid; - nvmeio->cmd.cdw10 = cdw10; - nvmeio->cmd.cdw11 = cdw11; - nvmeio->cmd.cdw12 = cdw12; - nvmeio->cmd.cdw13 = cdw13; - nvmeio->cmd.cdw14 = cdw14; - nvmeio->cmd.cdw15 = cdw15; + nvmeio->cmd.opc_fuse = NVME_CMD_SET_OPC(cmd); + nvmeio->cmd.nsid = htole32(nsid); + nvmeio->cmd.cdw10 = htole32(cdw10); + nvmeio->cmd.cdw11 = htole32(cdw11); + nvmeio->cmd.cdw12 = htole32(cdw12); + nvmeio->cmd.cdw13 = htole32(cdw13); + nvmeio->cmd.cdw14 = htole32(cdw14); + nvmeio->cmd.cdw15 = htole32(cdw15); } int @@ -118,24 +118,32 @@ const char * nvme_op_string(const struct nvme_command *cmd) { - if (cmd->opc > nitems(nvme_opc2str)) + uint8_t opc; + + opc = (cmd->opc_fuse >> NVME_CMD_OPC_SHIFT) & NVME_CMD_OPC_MASK; + if (opc > nitems(nvme_opc2str)) return "UNKNOWN"; - return nvme_opc2str[cmd->opc]; + return nvme_opc2str[opc]; } const char * nvme_cmd_string(const struct nvme_command *cmd, char *cmd_string, size_t len) { + uint8_t opc, fuse; + + opc = (cmd->opc_fuse >> NVME_CMD_OPC_SHIFT) & NVME_CMD_OPC_MASK; + fuse = (cmd->opc_fuse >> NVME_CMD_FUSE_SHIFT) & NVME_CMD_FUSE_MASK; /* * cid, rsvd areas and mptr not printed, since they are used * only internally by the SIM. */ snprintf(cmd_string, len, "opc=%x fuse=%x nsid=%x prp1=%llx prp2=%llx cdw=%x %x %x %x %x %x", - cmd->opc, cmd->fuse, cmd->nsid, + opc, fuse, cmd->nsid, (unsigned long long)cmd->prp1, (unsigned long long)cmd->prp2, - cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14, cmd->cdw15); + cmd->cdw10, cmd->cdw11, cmd->cdw12, + cmd->cdw13, cmd->cdw14, cmd->cdw15); return cmd_string; } Index: head/sys/cam/nvme/nvme_da.c =================================================================== --- head/sys/cam/nvme/nvme_da.c +++ head/sys/cam/nvme/nvme_da.c @@ -676,6 +676,7 @@ const struct nvme_namespace_data *nsd; const struct nvme_controller_data *cd; char announce_buf[80]; + uint8_t flbas_fmt, lbads, vwc_present; u_int maxio; int quirks; @@ -744,13 +745,19 @@ else if (maxio > MAXPHYS) maxio = MAXPHYS; /* for safety */ disk->d_maxsize = maxio; - disk->d_sectorsize = 1 << nsd->lbaf[nsd->flbas.format].lbads; + flbas_fmt = (nsd->flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & + NVME_NS_DATA_FLBAS_FORMAT_MASK; + lbads = (nsd->lbaf[flbas_fmt] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) & + NVME_NS_DATA_LBAF_LBADS_MASK; + disk->d_sectorsize = 1 << lbads; disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze); disk->d_delmaxsize = disk->d_mediasize; disk->d_flags = DISKFLAG_DIRECT_COMPLETION; // if (cd->oncs.dsm) // XXX broken? disk->d_flags |= DISKFLAG_CANDELETE; - if (cd->vwc.present) + vwc_present = (cd->vwc >> NVME_CTRLR_DATA_VWC_PRESENT_SHIFT) & + NVME_CTRLR_DATA_VWC_PRESENT_MASK; + if (vwc_present) disk->d_flags |= DISKFLAG_CANFLUSHCACHE; if ((cpi.hba_misc & PIM_UNMAPPED) != 0) { disk->d_flags |= DISKFLAG_UNMAPPED_BIO; @@ -905,9 +912,9 @@ return; } dsm_range->length = - bp->bio_bcount / softc->disk->d_sectorsize; + htole32(bp->bio_bcount / softc->disk->d_sectorsize); dsm_range->starting_lba = - bp->bio_offset / softc->disk->d_sectorsize; + htole64(bp->bio_offset / softc->disk->d_sectorsize); bp->bio_driver2 = dsm_range; nda_nvme_trim(softc, &start_ccb->nvmeio, dsm_range, 1); start_ccb->ccb_h.ccb_state = NDA_CCB_TRIM; Index: head/sys/conf/files =================================================================== --- head/sys/conf/files +++ head/sys/conf/files @@ -2535,6 +2535,17 @@ dev/nsp/nsp.c optional nsp dev/nsp/nsp_pccard.c optional nsp pccard dev/null/null.c standard +dev/nvd/nvd.c optional nvd nvme +dev/nvme/nvme.c optional nvme +dev/nvme/nvme_ctrlr.c optional nvme +dev/nvme/nvme_ctrlr_cmd.c optional nvme +dev/nvme/nvme_ns.c optional nvme +dev/nvme/nvme_ns_cmd.c optional nvme +dev/nvme/nvme_qpair.c optional nvme +dev/nvme/nvme_sim.c optional nvme scbus +dev/nvme/nvme_sysctl.c optional nvme +dev/nvme/nvme_test.c optional nvme +dev/nvme/nvme_util.c optional nvme dev/oce/oce_hw.c optional oce pci dev/oce/oce_if.c optional oce pci dev/oce/oce_mbox.c optional oce pci Index: head/sys/dev/mpr/mpr_sas.c =================================================================== --- head/sys/dev/mpr/mpr_sas.c +++ head/sys/dev/mpr/mpr_sas.c @@ -1839,7 +1839,7 @@ /* Build NVMe DSM command */ c = (struct nvme_command *) req->NVMe_Command; - c->opc = NVME_OPC_DATASET_MANAGEMENT; + c->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DATASET_MANAGEMENT); c->nsid = htole32(csio->ccb_h.target_lun + 1); c->cdw10 = htole32(ndesc - 1); c->cdw11 = htole32(NVME_DSM_ATTR_DEALLOCATE); @@ -2263,22 +2263,26 @@ * Returns appropriate scsi_status */ static u8 -mprsas_nvme_trans_status_code(struct nvme_status nvme_status, +mprsas_nvme_trans_status_code(uint16_t nvme_status, struct mpr_command *cm) { u8 status = MPI2_SCSI_STATUS_GOOD; int skey, asc, ascq; union ccb *ccb = cm->cm_complete_data; int returned_sense_len; + uint8_t sct, sc; + sct = NVME_STATUS_GET_SCT(nvme_status); + sc = NVME_STATUS_GET_SC(nvme_status); + status = MPI2_SCSI_STATUS_CHECK_CONDITION; skey = SSD_KEY_ILLEGAL_REQUEST; asc = SCSI_ASC_NO_SENSE; ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE; - switch (nvme_status.sct) { + switch (sct) { case NVME_SCT_GENERIC: - switch (nvme_status.sc) { + switch (sc) { case NVME_SC_SUCCESS: status = MPI2_SCSI_STATUS_GOOD; skey = SSD_KEY_NO_SENSE; @@ -2351,7 +2355,7 @@ } break; case NVME_SCT_COMMAND_SPECIFIC: - switch (nvme_status.sc) { + switch (sc) { case NVME_SC_INVALID_FORMAT: status = MPI2_SCSI_STATUS_CHECK_CONDITION; skey = SSD_KEY_ILLEGAL_REQUEST; @@ -2367,7 +2371,7 @@ } break; case NVME_SCT_MEDIA_ERROR: - switch (nvme_status.sc) { + switch (sc) { case NVME_SC_WRITE_FAULTS: status = MPI2_SCSI_STATUS_CHECK_CONDITION; skey = SSD_KEY_MEDIUM_ERROR; Index: head/sys/dev/nvme/nvme.h =================================================================== --- head/sys/dev/nvme/nvme.h +++ head/sys/dev/nvme/nvme.h @@ -36,6 +36,7 @@ #endif #include +#include #define NVME_PASSTHROUGH_CMD _IOWR('n', 0, struct nvme_pt_command) #define NVME_RESET_CONTROLLER _IO('n', 1) @@ -59,153 +60,264 @@ /* Cap nvme to 1MB transfers driver explodes with larger sizes */ #define NVME_MAX_XFER_SIZE (MAXPHYS < (1<<20) ? MAXPHYS : (1<<20)) -union cap_lo_register { - uint32_t raw; - struct { - /** maximum queue entries supported */ - uint32_t mqes : 16; +/* Register field definitions */ +#define NVME_CAP_LO_REG_MQES_SHIFT (0) +#define NVME_CAP_LO_REG_MQES_MASK (0xFFFF) +#define NVME_CAP_LO_REG_CQR_SHIFT (16) +#define NVME_CAP_LO_REG_CQR_MASK (0x1) +#define NVME_CAP_LO_REG_AMS_SHIFT (17) +#define NVME_CAP_LO_REG_AMS_MASK (0x3) +#define NVME_CAP_LO_REG_TO_SHIFT (24) +#define NVME_CAP_LO_REG_TO_MASK (0xFF) - /** contiguous queues required */ - uint32_t cqr : 1; +#define NVME_CAP_HI_REG_DSTRD_SHIFT (0) +#define NVME_CAP_HI_REG_DSTRD_MASK (0xF) +#define NVME_CAP_HI_REG_CSS_NVM_SHIFT (5) +#define NVME_CAP_HI_REG_CSS_NVM_MASK (0x1) +#define NVME_CAP_HI_REG_MPSMIN_SHIFT (16) +#define NVME_CAP_HI_REG_MPSMIN_MASK (0xF) +#define NVME_CAP_HI_REG_MPSMAX_SHIFT (20) +#define NVME_CAP_HI_REG_MPSMAX_MASK (0xF) - /** arbitration mechanism supported */ - uint32_t ams : 2; +#define NVME_CC_REG_EN_SHIFT (0) +#define NVME_CC_REG_EN_MASK (0x1) +#define NVME_CC_REG_CSS_SHIFT (4) +#define NVME_CC_REG_CSS_MASK (0x7) +#define NVME_CC_REG_MPS_SHIFT (7) +#define NVME_CC_REG_MPS_MASK (0xF) +#define NVME_CC_REG_AMS_SHIFT (11) +#define NVME_CC_REG_AMS_MASK (0x7) +#define NVME_CC_REG_SHN_SHIFT (14) +#define NVME_CC_REG_SHN_MASK (0x3) +#define NVME_CC_REG_IOSQES_SHIFT (16) +#define NVME_CC_REG_IOSQES_MASK (0xF) +#define NVME_CC_REG_IOCQES_SHIFT (20) +#define NVME_CC_REG_IOCQES_MASK (0xF) - uint32_t reserved1 : 5; +#define NVME_CSTS_REG_RDY_SHIFT (0) +#define NVME_CSTS_REG_RDY_MASK (0x1) +#define NVME_CSTS_REG_CFS_SHIFT (1) +#define NVME_CSTS_REG_CFS_MASK (0x1) +#define NVME_CSTS_REG_SHST_SHIFT (2) +#define NVME_CSTS_REG_SHST_MASK (0x3) - /** timeout */ - uint32_t to : 8; - } bits __packed; -} __packed; +#define NVME_CSTS_GET_SHST(csts) (((csts) >> NVME_CSTS_REG_SHST_SHIFT) & NVME_CSTS_REG_SHST_MASK) -_Static_assert(sizeof(union cap_lo_register) == 4, "bad size for cap_lo_register"); +#define NVME_AQA_REG_ASQS_SHIFT (0) +#define NVME_AQA_REG_ASQS_MASK (0xFFF) +#define NVME_AQA_REG_ACQS_SHIFT (16) +#define NVME_AQA_REG_ACQS_MASK (0xFFF) -union cap_hi_register { - uint32_t raw; - struct { - /** doorbell stride */ - uint32_t dstrd : 4; +/* Command field definitions */ - uint32_t reserved3 : 1; +#define NVME_CMD_OPC_SHIFT (0) +#define NVME_CMD_OPC_MASK (0xFF) +#define NVME_CMD_FUSE_SHIFT (8) +#define NVME_CMD_FUSE_MASK (0x3) - /** command sets supported */ - uint32_t css_nvm : 1; +#define NVME_CMD_SET_OPC(opc) (htole16(((opc) & NVME_CMD_OPC_MASK) << NVME_CMD_OPC_SHIFT)) - uint32_t css_reserved : 3; - uint32_t reserved2 : 7; +#define NVME_STATUS_P_SHIFT (0) +#define NVME_STATUS_P_MASK (0x1) +#define NVME_STATUS_SC_SHIFT (1) +#define NVME_STATUS_SC_MASK (0xFF) +#define NVME_STATUS_SCT_SHIFT (9) +#define NVME_STATUS_SCT_MASK (0x7) +#define NVME_STATUS_M_SHIFT (14) +#define NVME_STATUS_M_MASK (0x1) +#define NVME_STATUS_DNR_SHIFT (15) +#define NVME_STATUS_DNR_MASK (0x1) - /** memory page size minimum */ - uint32_t mpsmin : 4; +#define NVME_STATUS_GET_P(st) (((st) >> NVME_STATUS_P_SHIFT) & NVME_STATUS_P_MASK) +#define NVME_STATUS_GET_SC(st) (((st) >> NVME_STATUS_SC_SHIFT) & NVME_STATUS_SC_MASK) +#define NVME_STATUS_GET_SCT(st) (((st) >> NVME_STATUS_SCT_SHIFT) & NVME_STATUS_SCT_MASK) +#define NVME_STATUS_GET_M(st) (((st) >> NVME_STATUS_M_SHIFT) & NVME_STATUS_M_MASK) +#define NVME_STATUS_GET_DNR(st) (((st) >> NVME_STATUS_DNR_SHIFT) & NVME_STATUS_DNR_MASK) - /** memory page size maximum */ - uint32_t mpsmax : 4; +#define NVME_PWR_ST_MPS_SHIFT (0) +#define NVME_PWR_ST_MPS_MASK (0x1) +#define NVME_PWR_ST_NOPS_SHIFT (1) +#define NVME_PWR_ST_NOPS_MASK (0x1) +#define NVME_PWR_ST_RRT_SHIFT (0) +#define NVME_PWR_ST_RRT_MASK (0x1F) +#define NVME_PWR_ST_RRL_SHIFT (0) +#define NVME_PWR_ST_RRL_MASK (0x1F) +#define NVME_PWR_ST_RWT_SHIFT (0) +#define NVME_PWR_ST_RWT_MASK (0x1F) +#define NVME_PWR_ST_RWL_SHIFT (0) +#define NVME_PWR_ST_RWL_MASK (0x1F) +#define NVME_PWR_ST_IPS_SHIFT (6) +#define NVME_PWR_ST_IPS_MASK (0x3) +#define NVME_PWR_ST_APW_SHIFT (0) +#define NVME_PWR_ST_APW_MASK (0x7) +#define NVME_PWR_ST_APS_SHIFT (6) +#define NVME_PWR_ST_APS_MASK (0x3) - uint32_t reserved1 : 8; - } bits __packed; -} __packed; +/** OACS - optional admin command support */ +/* supports security send/receive commands */ +#define NVME_CTRLR_DATA_OACS_SECURITY_SHIFT (0) +#define NVME_CTRLR_DATA_OACS_SECURITY_MASK (0x1) +/* supports format nvm command */ +#define NVME_CTRLR_DATA_OACS_FORMAT_SHIFT (1) +#define NVME_CTRLR_DATA_OACS_FORMAT_MASK (0x1) +/* supports firmware activate/download commands */ +#define NVME_CTRLR_DATA_OACS_FIRMWARE_SHIFT (2) +#define NVME_CTRLR_DATA_OACS_FIRMWARE_MASK (0x1) +/* supports namespace management commands */ +#define NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT (3) +#define NVME_CTRLR_DATA_OACS_NSMGMT_MASK (0x1) -_Static_assert(sizeof(union cap_hi_register) == 4, "bad size of cap_hi_register"); +/** firmware updates */ +/* first slot is read-only */ +#define NVME_CTRLR_DATA_FRMW_SLOT1_RO_SHIFT (0) +#define NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK (0x1) +/* number of firmware slots */ +#define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT (1) +#define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK (0x7) -union cc_register { - uint32_t raw; - struct { - /** enable */ - uint32_t en : 1; +/** log page attributes */ +/* per namespace smart/health log page */ +#define NVME_CTRLR_DATA_LPA_NS_SMART_SHIFT (0) +#define NVME_CTRLR_DATA_LPA_NS_SMART_MASK (0x1) - uint32_t reserved1 : 3; +/** AVSCC - admin vendor specific command configuration */ +/* admin vendor specific commands use spec format */ +#define NVME_CTRLR_DATA_AVSCC_SPEC_FORMAT_SHIFT (0) +#define NVME_CTRLR_DATA_AVSCC_SPEC_FORMAT_MASK (0x1) - /** i/o command set selected */ - uint32_t css : 3; +/** Autonomous Power State Transition Attributes */ +/* Autonomous Power State Transitions supported */ +#define NVME_CTRLR_DATA_APSTA_APST_SUPP_SHIFT (0) +#define NVME_CTRLR_DATA_APSTA_APST_SUPP_MASK (0x1) - /** memory page size */ - uint32_t mps : 4; +/** submission queue entry size */ +#define NVME_CTRLR_DATA_SQES_MIN_SHIFT (0) +#define NVME_CTRLR_DATA_SQES_MIN_MASK (0xF) +#define NVME_CTRLR_DATA_SQES_MAX_SHIFT (4) +#define NVME_CTRLR_DATA_SQES_MAX_MASK (0xF) - /** arbitration mechanism selected */ - uint32_t ams : 3; +/** completion queue entry size */ +#define NVME_CTRLR_DATA_CQES_MIN_SHIFT (0) +#define NVME_CTRLR_DATA_CQES_MIN_MASK (0xF) +#define NVME_CTRLR_DATA_CQES_MAX_SHIFT (4) +#define NVME_CTRLR_DATA_CQES_MAX_MASK (0xF) - /** shutdown notification */ - uint32_t shn : 2; +/** optional nvm command support */ +#define NVME_CTRLR_DATA_ONCS_COMPARE_SHIFT (0) +#define NVME_CTRLR_DATA_ONCS_COMPARE_MASK (0x1) +#define NVME_CTRLR_DATA_ONCS_WRITE_UNC_SHIFT (1) +#define NVME_CTRLR_DATA_ONCS_WRITE_UNC_MASK (0x1) +#define NVME_CTRLR_DATA_ONCS_DSM_SHIFT (2) +#define NVME_CTRLR_DATA_ONCS_DSM_MASK (0x1) - /** i/o submission queue entry size */ - uint32_t iosqes : 4; +/** volatile write cache */ +#define NVME_CTRLR_DATA_VWC_PRESENT_SHIFT (0) +#define NVME_CTRLR_DATA_VWC_PRESENT_MASK (0x1) - /** i/o completion queue entry size */ - uint32_t iocqes : 4; +/** namespace features */ +/* thin provisioning */ +#define NVME_NS_DATA_NSFEAT_THIN_PROV_SHIFT (0) +#define NVME_NS_DATA_NSFEAT_THIN_PROV_MASK (0x1) - uint32_t reserved2 : 8; - } bits __packed; -} __packed; +/** formatted lba size */ +#define NVME_NS_DATA_FLBAS_FORMAT_SHIFT (0) +#define NVME_NS_DATA_FLBAS_FORMAT_MASK (0xF) +#define NVME_NS_DATA_FLBAS_EXTENDED_SHIFT (4) +#define NVME_NS_DATA_FLBAS_EXTENDED_MASK (0x1) -_Static_assert(sizeof(union cc_register) == 4, "bad size for cc_register"); +/** metadata capabilities */ +/* metadata can be transferred as part of data prp list */ +#define NVME_NS_DATA_MC_EXTENDED_SHIFT (0) +#define NVME_NS_DATA_MC_EXTENDED_MASK (0x1) +/* metadata can be transferred with separate metadata pointer */ +#define NVME_NS_DATA_MC_POINTER_SHIFT (1) +#define NVME_NS_DATA_MC_POINTER_MASK (0x1) -enum shn_value { - NVME_SHN_NORMAL = 0x1, - NVME_SHN_ABRUPT = 0x2, -}; +/** end-to-end data protection capabilities */ +/* protection information type 1 */ +#define NVME_NS_DATA_DPC_PIT1_SHIFT (0) +#define NVME_NS_DATA_DPC_PIT1_MASK (0x1) +/* protection information type 2 */ +#define NVME_NS_DATA_DPC_PIT2_SHIFT (1) +#define NVME_NS_DATA_DPC_PIT2_MASK (0x1) +/* protection information type 3 */ +#define NVME_NS_DATA_DPC_PIT3_SHIFT (2) +#define NVME_NS_DATA_DPC_PIT3_MASK (0x1) +/* first eight bytes of metadata */ +#define NVME_NS_DATA_DPC_MD_START_SHIFT (3) +#define NVME_NS_DATA_DPC_MD_START_MASK (0x1) +/* last eight bytes of metadata */ +#define NVME_NS_DATA_DPC_MD_END_SHIFT (4) +#define NVME_NS_DATA_DPC_MD_END_MASK (0x1) -union csts_register { - uint32_t raw; - struct { - /** ready */ - uint32_t rdy : 1; +/** end-to-end data protection type settings */ +/* protection information type */ +#define NVME_NS_DATA_DPS_PIT_SHIFT (0) +#define NVME_NS_DATA_DPS_PIT_MASK (0x7) +/* 1 == protection info transferred at start of metadata */ +/* 0 == protection info transferred at end of metadata */ +#define NVME_NS_DATA_DPS_MD_START_SHIFT (3) +#define NVME_NS_DATA_DPS_MD_START_MASK (0x1) - /** controller fatal status */ - uint32_t cfs : 1; +/** lba format support */ +/* metadata size */ +#define NVME_NS_DATA_LBAF_MS_SHIFT (0) +#define NVME_NS_DATA_LBAF_MS_MASK (0xFFFF) +/* lba data size */ +#define NVME_NS_DATA_LBAF_LBADS_SHIFT (16) +#define NVME_NS_DATA_LBAF_LBADS_MASK (0xFF) +/* relative performance */ +#define NVME_NS_DATA_LBAF_RP_SHIFT (24) +#define NVME_NS_DATA_LBAF_RP_MASK (0x3) - /** shutdown status */ - uint32_t shst : 2; +enum nvme_critical_warning_state { + NVME_CRIT_WARN_ST_AVAILABLE_SPARE = 0x1, + NVME_CRIT_WARN_ST_TEMPERATURE = 0x2, + NVME_CRIT_WARN_ST_DEVICE_RELIABILITY = 0x4, + NVME_CRIT_WARN_ST_READ_ONLY = 0x8, + NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP = 0x10, +}; +#define NVME_CRIT_WARN_ST_RESERVED_MASK (0xE0) - uint32_t reserved1 : 28; - } bits __packed; -} __packed; +/* slot for current FW */ +#define NVME_FIRMWARE_PAGE_AFI_SLOT_SHIFT (0) +#define NVME_FIRMWARE_PAGE_AFI_SLOT_MASK (0x7) -_Static_assert(sizeof(union csts_register) == 4, "bad size for csts_register"); +/* CC register SHN field values */ +enum shn_value { + NVME_SHN_NORMAL = 0x1, + NVME_SHN_ABRUPT = 0x2, +}; +/* CSTS register SHST field values */ enum shst_value { NVME_SHST_NORMAL = 0x0, NVME_SHST_OCCURRING = 0x1, NVME_SHST_COMPLETE = 0x2, }; -union aqa_register { - uint32_t raw; - struct { - /** admin submission queue size */ - uint32_t asqs : 12; - - uint32_t reserved1 : 4; - - /** admin completion queue size */ - uint32_t acqs : 12; - - uint32_t reserved2 : 4; - } bits __packed; -} __packed; - -_Static_assert(sizeof(union aqa_register) == 4, "bad size for aqa_resgister"); - struct nvme_registers { /** controller capabilities */ - union cap_lo_register cap_lo; - union cap_hi_register cap_hi; + uint32_t cap_lo; + uint32_t cap_hi; uint32_t vs; /* version */ uint32_t intms; /* interrupt mask set */ uint32_t intmc; /* interrupt mask clear */ /** controller configuration */ - union cc_register cc; + uint32_t cc; uint32_t reserved1; /** controller status */ - union csts_register csts; + uint32_t csts; uint32_t reserved2; /** admin queue attributes */ - union aqa_register aqa; + uint32_t aqa; uint64_t asq; /* admin submission queue base addr */ uint64_t acq; /* admin completion queue base addr */ @@ -222,9 +334,7 @@ struct nvme_command { /* dword 0 */ - uint16_t opc : 8; /* opcode */ - uint16_t fuse : 2; /* fused operation */ - uint16_t rsvd1 : 6; + uint16_t opc_fuse; /* opcode, fused operation */ uint16_t cid; /* command identifier */ /* dword 1 */ @@ -254,18 +364,6 @@ _Static_assert(sizeof(struct nvme_command) == 16 * 4, "bad size for nvme_command"); -struct nvme_status { - - uint16_t p : 1; /* phase tag */ - uint16_t sc : 8; /* status code */ - uint16_t sct : 3; /* status code type */ - uint16_t rsvd2 : 2; - uint16_t m : 1; /* more */ - uint16_t dnr : 1; /* do not retry */ -} __packed; - -_Static_assert(sizeof(struct nvme_status) == 2, "bad size for nvme_status"); - struct nvme_completion { /* dword 0 */ @@ -280,7 +378,7 @@ /* dword 3 */ uint16_t cid; /* command identifier */ - struct nvme_status status; + uint16_t status; } __packed; _Static_assert(sizeof(struct nvme_completion) == 4 * 4, "bad size for nvme_completion"); @@ -435,27 +533,22 @@ /** Maximum Power */ uint16_t mp; /* Maximum Power */ uint8_t ps_rsvd1; - uint8_t mps : 1; /* Max Power Scale */ - uint8_t nops : 1; /* Non-Operational State */ - uint8_t ps_rsvd2 : 6; + uint8_t mps_nops; /* Max Power Scale, Non-Operational State */ + uint32_t enlat; /* Entry Latency */ uint32_t exlat; /* Exit Latency */ - uint8_t rrt : 5; /* Relative Read Throughput */ - uint8_t ps_rsvd3 : 3; - uint8_t rrl : 5; /* Relative Read Latency */ - uint8_t ps_rsvd4 : 3; - uint8_t rwt : 5; /* Relative Write Throughput */ - uint8_t ps_rsvd5 : 3; - uint8_t rwl : 5; /* Relative Write Latency */ - uint8_t ps_rsvd6 : 3; + + uint8_t rrt; /* Relative Read Throughput */ + uint8_t rrl; /* Relative Read Latency */ + uint8_t rwt; /* Relative Write Throughput */ + uint8_t rwl; /* Relative Write Latency */ + uint16_t idlp; /* Idle Power */ - uint8_t ps_rsvd7 : 6; - uint8_t ips : 2; /* Idle Power Scale */ + uint8_t ips; /* Idle Power Scale */ uint8_t ps_rsvd8; + uint16_t actp; /* Active Power */ - uint8_t apw : 3; /* Active Power Workload */ - uint8_t ps_rsvd9 : 3; - uint8_t aps : 2; /* Active Power Scale */ + uint8_t apw_aps; /* Active Power Workload, Active Power Scale */ uint8_t ps_rsvd10[9]; } __packed; @@ -524,22 +617,8 @@ /* bytes 256-511: admin command set attributes */ /** optional admin command support */ - struct { - /* supports security send/receive commands */ - uint16_t security : 1; + uint16_t oacs; - /* supports format nvm command */ - uint16_t format : 1; - - /* supports firmware activate/download commands */ - uint16_t firmware : 1; - - /* supports namespace management commands */ - uint16_t nsmgmt : 1; - - uint16_t oacs_rsvd : 12; - } __packed oacs; - /** abort command limit */ uint8_t acl; @@ -547,24 +626,11 @@ uint8_t aerl; /** firmware updates */ - struct { - /* first slot is read-only */ - uint8_t slot1_ro : 1; + uint8_t frmw; - /* number of firmware slots */ - uint8_t num_slots : 3; - - uint8_t frmw_rsvd : 4; - } __packed frmw; - /** log page attributes */ - struct { - /* per namespace smart/health log page */ - uint8_t ns_smart : 1; + uint8_t lpa; - uint8_t lpa_rsvd : 7; - } __packed lpa; - /** error log page entries */ uint8_t elpe; @@ -572,21 +638,11 @@ uint8_t npss; /** admin vendor specific command configuration */ - struct { - /* admin vendor specific commands use spec format */ - uint8_t spec_format : 1; + uint8_t avscc; - uint8_t avscc_rsvd : 7; - } __packed avscc; - /** Autonomous Power State Transition Attributes */ - struct { - /* Autonmous Power State Transitions supported */ - uint8_t apst_supp : 1; + uint8_t apsta; - uint8_t apsta_rsvd : 7; - } __packed apsta; - /** Warning Composite Temperature Threshold */ uint16_t wctemp; @@ -636,20 +692,14 @@ /** Sanitize Capabilities */ uint32_t sanicap; /* Really a bitfield */ - uint8_t reserved3[180]; + uint8_t reserved3[180]; /* bytes 512-703: nvm command set attributes */ /** submission queue entry size */ - struct { - uint8_t min : 4; - uint8_t max : 4; - } __packed sqes; + uint8_t sqes; /** completion queue entry size */ - struct { - uint8_t min : 4; - uint8_t max : 4; - } __packed cqes; + uint8_t cqes; /** Maximum Outstanding Commands */ uint16_t maxcmd; @@ -658,12 +708,7 @@ uint32_t nn; /** optional nvm command support */ - struct { - uint16_t compare : 1; - uint16_t write_unc : 1; - uint16_t dsm: 1; - uint16_t reserved: 13; - } __packed oncs; + uint16_t oncs; /** fused operation support */ uint16_t fuses; @@ -672,10 +717,7 @@ uint8_t fna; /** volatile write cache */ - struct { - uint8_t present : 1; - uint8_t reserved : 7; - } __packed vwc; + uint8_t vwc; /* TODO: flesh out remaining nvm command set attributes */ uint8_t reserved5[178]; @@ -704,79 +746,28 @@ uint64_t nuse; /** namespace features */ - struct { - /** thin provisioning */ - uint8_t thin_prov : 1; - uint8_t reserved1 : 7; - } __packed nsfeat; + uint8_t nsfeat; /** number of lba formats */ uint8_t nlbaf; /** formatted lba size */ - struct { - uint8_t format : 4; - uint8_t extended : 1; - uint8_t reserved2 : 3; - } __packed flbas; + uint8_t flbas; /** metadata capabilities */ - struct { - /* metadata can be transferred as part of data prp list */ - uint8_t extended : 1; + uint8_t mc; - /* metadata can be transferred with separate metadata pointer */ - uint8_t pointer : 1; - - uint8_t reserved3 : 6; - } __packed mc; - /** end-to-end data protection capabilities */ - struct { - /* protection information type 1 */ - uint8_t pit1 : 1; + uint8_t dpc; - /* protection information type 2 */ - uint8_t pit2 : 1; - - /* protection information type 3 */ - uint8_t pit3 : 1; - - /* first eight bytes of metadata */ - uint8_t md_start : 1; - - /* last eight bytes of metadata */ - uint8_t md_end : 1; - } __packed dpc; - /** end-to-end data protection type settings */ - struct { - /* protection information type */ - uint8_t pit : 3; + uint8_t dps; - /* 1 == protection info transferred at start of metadata */ - /* 0 == protection info transferred at end of metadata */ - uint8_t md_start : 1; - - uint8_t reserved4 : 4; - } __packed dps; - uint8_t reserved5[98]; /** lba format support */ - struct { - /** metadata size */ - uint32_t ms : 16; + uint32_t lbaf[16]; - /** lba data size */ - uint32_t lbads : 8; - - /** relative performance */ - uint32_t rp : 2; - - uint32_t reserved6 : 6; - } __packed lbaf[16]; - uint8_t reserved6[192]; uint8_t vendor_specific[3712]; @@ -818,7 +809,7 @@ uint64_t error_count; uint16_t sqid; uint16_t cid; - struct nvme_status status; + uint16_t status; uint16_t error_location; uint64_t lba; uint32_t nsid; @@ -828,26 +819,9 @@ _Static_assert(sizeof(struct nvme_error_information_entry) == 64, "bad size for nvme_error_information_entry"); -union nvme_critical_warning_state { - - uint8_t raw; - - struct { - uint8_t available_spare : 1; - uint8_t temperature : 1; - uint8_t device_reliability : 1; - uint8_t read_only : 1; - uint8_t volatile_memory_backup : 1; - uint8_t reserved : 3; - } __packed bits; -} __packed; - -_Static_assert(sizeof(union nvme_critical_warning_state) == 1, "bad size for nvme_critical_warning_state"); - struct nvme_health_information_page { - union nvme_critical_warning_state critical_warning; - + uint8_t critical_warning; uint16_t temperature; uint8_t available_spare; uint8_t available_spare_threshold; @@ -884,11 +858,7 @@ struct nvme_firmware_page { - struct { - uint8_t slot : 3; /* slot for current FW */ - uint8_t reserved : 5; - } __packed afi; - + uint8_t afi; uint8_t reserved[7]; uint64_t revision[7]; /* revisions for 7 slots */ uint8_t reserved2[448]; @@ -987,7 +957,7 @@ }; #define nvme_completion_is_error(cpl) \ - ((cpl)->status.sc != 0 || (cpl)->status.sct != 0) + (NVME_STATUS_GET_SC((cpl)->status) != 0 || NVME_STATUS_GET_SCT((cpl)->status) != 0) void nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen); @@ -1087,19 +1057,19 @@ void nvme_ns_flush_cmd(struct nvme_command *cmd, uint32_t nsid) { - cmd->opc = NVME_OPC_FLUSH; - cmd->nsid = nsid; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_FLUSH); + cmd->nsid = htole32(nsid); } static inline void nvme_ns_rw_cmd(struct nvme_command *cmd, uint32_t rwcmd, uint32_t nsid, uint64_t lba, uint32_t count) { - cmd->opc = rwcmd; - cmd->nsid = nsid; - cmd->cdw10 = lba & 0xffffffffu; - cmd->cdw11 = lba >> 32; - cmd->cdw12 = count-1; + cmd->opc_fuse = NVME_CMD_SET_OPC(rwcmd); + cmd->nsid = htole32(nsid); + cmd->cdw10 = htole32(lba & 0xffffffffu); + cmd->cdw11 = htole32(lba >> 32); + cmd->cdw12 = htole32(count-1); } static inline @@ -1120,14 +1090,160 @@ void nvme_ns_trim_cmd(struct nvme_command *cmd, uint32_t nsid, uint32_t num_ranges) { - cmd->opc = NVME_OPC_DATASET_MANAGEMENT; - cmd->nsid = nsid; - cmd->cdw10 = num_ranges - 1; - cmd->cdw11 = NVME_DSM_ATTR_DEALLOCATE; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DATASET_MANAGEMENT); + cmd->nsid = htole32(nsid); + cmd->cdw10 = htole32(num_ranges - 1); + cmd->cdw11 = htole32(NVME_DSM_ATTR_DEALLOCATE); } extern int nvme_use_nvd; #endif /* _KERNEL */ + +/* Endianess conversion functions for NVMe structs */ +static inline +void nvme_completion_swapbytes(struct nvme_completion *s) +{ + + s->cdw0 = le32toh(s->cdw0); + /* omit rsvd1 */ + s->sqhd = le16toh(s->sqhd); + s->sqid = le16toh(s->sqid); + /* omit cid */ + s->status = le16toh(s->status); +} + +static inline +void nvme_power_state_swapbytes(struct nvme_power_state *s) +{ + + s->mp = le16toh(s->mp); + s->enlat = le32toh(s->enlat); + s->exlat = le32toh(s->exlat); + s->idlp = le16toh(s->idlp); + s->actp = le16toh(s->actp); +} + +static inline +void nvme_controller_data_swapbytes(struct nvme_controller_data *s) +{ + int i; + + s->vid = le16toh(s->vid); + s->ssvid = le16toh(s->ssvid); + s->ctrlr_id = le16toh(s->ctrlr_id); + s->ver = le32toh(s->ver); + s->rtd3r = le32toh(s->rtd3r); + s->rtd3e = le32toh(s->rtd3e); + s->oaes = le32toh(s->oaes); + s->ctratt = le32toh(s->ctratt); + s->oacs = le16toh(s->oacs); + s->wctemp = le16toh(s->wctemp); + s->cctemp = le16toh(s->cctemp); + s->mtfa = le16toh(s->mtfa); + s->hmpre = le32toh(s->hmpre); + s->hmmin = le32toh(s->hmmin); + s->rpmbs = le32toh(s->rpmbs); + s->edstt = le16toh(s->edstt); + s->kas = le16toh(s->kas); + s->hctma = le16toh(s->hctma); + s->mntmt = le16toh(s->mntmt); + s->mxtmt = le16toh(s->mxtmt); + s->sanicap = le32toh(s->sanicap); + s->maxcmd = le16toh(s->maxcmd); + s->nn = le32toh(s->nn); + s->oncs = le16toh(s->oncs); + s->fuses = le16toh(s->fuses); + for (i = 0; i < 32; i++) + nvme_power_state_swapbytes(&s->power_state[i]); +} + +static inline +void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s) +{ + int i; + + s->nsze = le64toh(s->nsze); + s->ncap = le64toh(s->ncap); + s->nuse = le64toh(s->nuse); + for (i = 0; i < 16; i++) + s->lbaf[i] = le32toh(s->lbaf[i]); +} + +static inline +void nvme_error_information_entry_swapbytes(struct nvme_error_information_entry *s) +{ + + s->error_count = le64toh(s->error_count); + s->sqid = le16toh(s->sqid); + s->cid = le16toh(s->cid); + s->status = le16toh(s->status); + s->error_location = le16toh(s->error_location); + s->lba = le64toh(s->lba); + s->nsid = le32toh(s->nsid); +} + +static inline +void nvme_le128toh(void *p) +{ +#if _BYTE_ORDER != _LITTLE_ENDIAN + /* Swap 16 bytes in place */ + char *tmp = (char*)p; + char b; + int i; + for (i = 0; i < 8; i++) { + b = tmp[i]; + tmp[i] = tmp[15-i]; + tmp[15-i] = b; + } +#endif +} + +static inline +void nvme_health_information_page_swapbytes(struct nvme_health_information_page *s) +{ + int i; + + s->temperature = le16toh(s->temperature); + nvme_le128toh((void *)s->data_units_read); + nvme_le128toh((void *)s->data_units_written); + nvme_le128toh((void *)s->host_read_commands); + nvme_le128toh((void *)s->host_write_commands); + nvme_le128toh((void *)s->controller_busy_time); + nvme_le128toh((void *)s->power_cycles); + nvme_le128toh((void *)s->power_on_hours); + nvme_le128toh((void *)s->unsafe_shutdowns); + nvme_le128toh((void *)s->media_errors); + nvme_le128toh((void *)s->num_error_info_log_entries); + s->warning_temp_time = le32toh(s->warning_temp_time); + s->error_temp_time = le32toh(s->error_temp_time); + for (i = 0; i < 8; i++) + s->temp_sensor[i] = le16toh(s->temp_sensor[i]); +} + + +static inline +void nvme_firmware_page_swapbytes(struct nvme_firmware_page *s) +{ + int i; + + for (i = 0; i < 7; i++) + s->revision[i] = le64toh(s->revision[i]); +} + +static inline +void intel_log_temp_stats_swapbytes(struct intel_log_temp_stats *s) +{ + + s->current = le64toh(s->current); + s->overtemp_flag_last = le64toh(s->overtemp_flag_last); + s->overtemp_flag_life = le64toh(s->overtemp_flag_life); + s->max_temp = le64toh(s->max_temp); + s->min_temp = le64toh(s->min_temp); + /* omit _rsvd[] */ + s->max_oper_temp = le64toh(s->max_oper_temp); + s->min_oper_temp = le64toh(s->min_oper_temp); + s->est_offset = le64toh(s->est_offset); +} #endif /* __NVME_H__ */ Index: head/sys/dev/nvme/nvme.c =================================================================== --- head/sys/dev/nvme/nvme.c +++ head/sys/dev/nvme/nvme.c @@ -222,23 +222,38 @@ void nvme_dump_command(struct nvme_command *cmd) { + uint8_t opc, fuse; + + opc = (cmd->opc_fuse >> NVME_CMD_OPC_SHIFT) & NVME_CMD_OPC_MASK; + fuse = (cmd->opc_fuse >> NVME_CMD_FUSE_SHIFT) & NVME_CMD_FUSE_MASK; + printf( -"opc:%x f:%x r1:%x cid:%x nsid:%x r2:%x r3:%x mptr:%jx prp1:%jx prp2:%jx cdw:%x %x %x %x %x %x\n", - cmd->opc, cmd->fuse, cmd->rsvd1, cmd->cid, cmd->nsid, +"opc:%x f:%x cid:%x nsid:%x r2:%x r3:%x mptr:%jx prp1:%jx prp2:%jx cdw:%x %x %x %x %x %x\n", + opc, fuse, cmd->cid, le32toh(cmd->nsid), cmd->rsvd2, cmd->rsvd3, - (uintmax_t)cmd->mptr, (uintmax_t)cmd->prp1, (uintmax_t)cmd->prp2, - cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14, - cmd->cdw15); + (uintmax_t)le64toh(cmd->mptr), (uintmax_t)le64toh(cmd->prp1), (uintmax_t)le64toh(cmd->prp2), + le32toh(cmd->cdw10), le32toh(cmd->cdw11), le32toh(cmd->cdw12), + le32toh(cmd->cdw13), le32toh(cmd->cdw14), le32toh(cmd->cdw15)); } void nvme_dump_completion(struct nvme_completion *cpl) { + uint8_t p, sc, sct, m, dnr; + uint16_t status; + + status = le16toh(cpl->status); + + p = NVME_STATUS_GET_P(status); + sc = NVME_STATUS_GET_SC(status); + sct = NVME_STATUS_GET_SCT(status); + m = NVME_STATUS_GET_M(status); + dnr = NVME_STATUS_GET_DNR(status); + printf("cdw0:%08x sqhd:%04x sqid:%04x " "cid:%04x p:%x sc:%02x sct:%x m:%x dnr:%x\n", - cpl->cdw0, cpl->sqhd, cpl->sqid, - cpl->cid, cpl->status.p, cpl->status.sc, cpl->status.sct, - cpl->status.m, cpl->status.dnr); + le32toh(cpl->cdw0), le16toh(cpl->sqhd), le16toh(cpl->sqid), + cpl->cid, p, sc, sct, m, dnr); } static int Index: head/sys/dev/nvme/nvme_ctrlr.c =================================================================== --- head/sys/dev/nvme/nvme_ctrlr.c +++ head/sys/dev/nvme/nvme_ctrlr.c @@ -40,13 +40,14 @@ #include #include #include +#include #include #include #include "nvme_private.h" -#define B4_CHK_RDY_DELAY_MS 2300 /* work arond controller bug */ +#define B4_CHK_RDY_DELAY_MS 2300 /* work around controller bug */ static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr, struct nvme_async_event_request *aer); @@ -123,7 +124,8 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) { struct nvme_qpair *qpair; - union cap_lo_register cap_lo; + uint32_t cap_lo; + uint16_t mqes; int i, error, num_entries, num_trackers; num_entries = NVME_IO_ENTRIES; @@ -134,8 +136,9 @@ * devices may specify a smaller limit, so we need to check * the MQES field in the capabilities register. */ - cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); - num_entries = min(num_entries, cap_lo.bits.mqes+1); + cap_lo = nvme_mmio_read_4(ctrlr, cap_lo); + mqes = (cap_lo >> NVME_CAP_LO_REG_MQES_SHIFT) & NVME_CAP_LO_REG_MQES_MASK; + num_entries = min(num_entries, mqes + 1); num_trackers = NVME_IO_TRACKERS; TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers); @@ -243,19 +246,19 @@ nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr, int desired_val) { int ms_waited; - union csts_register csts; + uint32_t csts; - csts.raw = nvme_mmio_read_4(ctrlr, csts); + csts = nvme_mmio_read_4(ctrlr, csts); ms_waited = 0; - while (csts.bits.rdy != desired_val) { + while (((csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK) != desired_val) { if (ms_waited++ > ctrlr->ready_timeout_in_ms) { nvme_printf(ctrlr, "controller ready did not become %d " "within %d ms\n", desired_val, ctrlr->ready_timeout_in_ms); return (ENXIO); } DELAY(1000); - csts.raw = nvme_mmio_read_4(ctrlr, csts); + csts = nvme_mmio_read_4(ctrlr, csts); } return (0); @@ -264,21 +267,25 @@ static int nvme_ctrlr_disable(struct nvme_controller *ctrlr) { - union cc_register cc; - union csts_register csts; + uint32_t cc; + uint32_t csts; + uint8_t en, rdy; int err; - cc.raw = nvme_mmio_read_4(ctrlr, cc); - csts.raw = nvme_mmio_read_4(ctrlr, csts); + cc = nvme_mmio_read_4(ctrlr, cc); + csts = nvme_mmio_read_4(ctrlr, csts); + en = (cc >> NVME_CC_REG_EN_SHIFT) & NVME_CC_REG_EN_MASK; + rdy = (csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK; + /* * Per 3.1.5 in NVME 1.3 spec, transitioning CC.EN from 0 to 1 * when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when * CSTS.RDY is 0 "has undefined results" So make sure that CSTS.RDY * isn't the desired value. Short circuit if we're already disabled. */ - if (cc.bits.en == 1) { - if (csts.bits.rdy == 0) { + if (en == 1) { + if (rdy == 0) { /* EN == 1, wait for RDY == 1 or fail */ err = nvme_ctrlr_wait_for_ready(ctrlr, 1); if (err != 0) @@ -286,14 +293,14 @@ } } else { /* EN == 0 already wait for RDY == 0 */ - if (csts.bits.rdy == 0) + if (rdy == 0) return (0); else return (nvme_ctrlr_wait_for_ready(ctrlr, 0)); } - cc.bits.en = 0; - nvme_mmio_write_4(ctrlr, cc, cc.raw); + cc &= ~NVME_CC_REG_EN_MASK; + nvme_mmio_write_4(ctrlr, cc, cc); /* * Some drives have issues with accessing the mmio after we * disable, so delay for a bit after we write the bit to @@ -307,19 +314,24 @@ static int nvme_ctrlr_enable(struct nvme_controller *ctrlr) { - union cc_register cc; - union csts_register csts; - union aqa_register aqa; - int err; + uint32_t cc; + uint32_t csts; + uint32_t aqa; + uint32_t qsize; + uint8_t en, rdy; + int err; - cc.raw = nvme_mmio_read_4(ctrlr, cc); - csts.raw = nvme_mmio_read_4(ctrlr, csts); + cc = nvme_mmio_read_4(ctrlr, cc); + csts = nvme_mmio_read_4(ctrlr, csts); + en = (cc >> NVME_CC_REG_EN_SHIFT) & NVME_CC_REG_EN_MASK; + rdy = (csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK; + /* * See note in nvme_ctrlr_disable. Short circuit if we're already enabled. */ - if (cc.bits.en == 1) { - if (csts.bits.rdy == 1) + if (en == 1) { + if (rdy == 1) return (0); else return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); @@ -335,24 +347,28 @@ nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr); DELAY(5000); - aqa.raw = 0; /* acqs and asqs are 0-based. */ - aqa.bits.acqs = ctrlr->adminq.num_entries-1; - aqa.bits.asqs = ctrlr->adminq.num_entries-1; - nvme_mmio_write_4(ctrlr, aqa, aqa.raw); + qsize = ctrlr->adminq.num_entries - 1; + + aqa = 0; + aqa = (qsize & NVME_AQA_REG_ACQS_MASK) << NVME_AQA_REG_ACQS_SHIFT; + aqa |= (qsize & NVME_AQA_REG_ASQS_MASK) << NVME_AQA_REG_ASQS_SHIFT; + nvme_mmio_write_4(ctrlr, aqa, aqa); DELAY(5000); - cc.bits.en = 1; - cc.bits.css = 0; - cc.bits.ams = 0; - cc.bits.shn = 0; - cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ - cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ + /* Initialization values for CC */ + cc = 0; + cc |= 1 << NVME_CC_REG_EN_SHIFT; + cc |= 0 << NVME_CC_REG_CSS_SHIFT; + cc |= 0 << NVME_CC_REG_AMS_SHIFT; + cc |= 0 << NVME_CC_REG_SHN_SHIFT; + cc |= 6 << NVME_CC_REG_IOSQES_SHIFT; /* SQ entry size == 64 == 2^6 */ + cc |= 4 << NVME_CC_REG_IOCQES_SHIFT; /* CQ entry size == 16 == 2^4 */ /* This evaluates to 0, which is according to spec. */ - cc.bits.mps = (PAGE_SIZE >> 13); + cc |= (PAGE_SIZE >> 13) << NVME_CC_REG_MPS_SHIFT; - nvme_mmio_write_4(ctrlr, cc, cc.raw); + nvme_mmio_write_4(ctrlr, cc, cc); return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); } @@ -414,6 +430,9 @@ return (ENXIO); } + /* Convert data to host endian */ + nvme_controller_data_swapbytes(&ctrlr->cdata); + /* * Use MDTS to ensure our default max_xfer_size doesn't exceed what the * controller supports. @@ -531,8 +550,7 @@ case NVME_LOG_ERROR: log_page_size = min( sizeof(struct nvme_error_information_entry) * - ctrlr->cdata.elpe, - NVME_MAX_AER_LOG_SIZE); + (ctrlr->cdata.elpe + 1), NVME_MAX_AER_LOG_SIZE); break; case NVME_LOG_HEALTH_INFORMATION: log_page_size = sizeof(struct nvme_health_information_page); @@ -550,27 +568,27 @@ static void nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr, - union nvme_critical_warning_state state) + uint8_t state) { - if (state.bits.available_spare == 1) + if (state & NVME_CRIT_WARN_ST_AVAILABLE_SPARE) nvme_printf(ctrlr, "available spare space below threshold\n"); - if (state.bits.temperature == 1) + if (state & NVME_CRIT_WARN_ST_TEMPERATURE) nvme_printf(ctrlr, "temperature above threshold\n"); - if (state.bits.device_reliability == 1) + if (state & NVME_CRIT_WARN_ST_DEVICE_RELIABILITY) nvme_printf(ctrlr, "device reliability degraded\n"); - if (state.bits.read_only == 1) + if (state & NVME_CRIT_WARN_ST_READ_ONLY) nvme_printf(ctrlr, "media placed in read only mode\n"); - if (state.bits.volatile_memory_backup == 1) + if (state & NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP) nvme_printf(ctrlr, "volatile memory backup device failed\n"); - if (state.bits.reserved != 0) + if (state & NVME_CRIT_WARN_ST_RESERVED_MASK) nvme_printf(ctrlr, - "unknown critical warning(s): state = 0x%02x\n", state.raw); + "unknown critical warning(s): state = 0x%02x\n", state); } static void @@ -578,6 +596,8 @@ { struct nvme_async_event_request *aer = arg; struct nvme_health_information_page *health_info; + struct nvme_error_information_entry *err; + int i; /* * If the log page fetch for some reason completed with an error, @@ -588,6 +608,29 @@ nvme_notify_async_consumers(aer->ctrlr, &aer->cpl, aer->log_page_id, NULL, 0); else { + /* Convert data to host endian */ + switch (aer->log_page_id) { + case NVME_LOG_ERROR: + err = (struct nvme_error_information_entry *)aer->log_page_buffer; + for (i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++) + nvme_error_information_entry_swapbytes(err++); + break; + case NVME_LOG_HEALTH_INFORMATION: + nvme_health_information_page_swapbytes( + (struct nvme_health_information_page *)aer->log_page_buffer); + break; + case NVME_LOG_FIRMWARE_SLOT: + nvme_firmware_page_swapbytes( + (struct nvme_firmware_page *)aer->log_page_buffer); + break; + case INTEL_LOG_TEMP_STATS: + intel_log_temp_stats_swapbytes( + (struct intel_log_temp_stats *)aer->log_page_buffer); + break; + default: + break; + } + if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) { health_info = (struct nvme_health_information_page *) aer->log_page_buffer; @@ -600,8 +643,8 @@ * config so that we do not receive repeated * notifications for the same event. */ - aer->ctrlr->async_event_config.raw &= - ~health_info->critical_warning.raw; + aer->ctrlr->async_event_config &= + ~health_info->critical_warning; nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr, aer->ctrlr->async_event_config, NULL, NULL); } @@ -679,7 +722,7 @@ * nature never be timed out. */ req->timeout = FALSE; - req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST; + req->cmd.opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_ASYNC_EVENT_REQUEST); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -690,8 +733,8 @@ struct nvme_async_event_request *aer; uint32_t i; - ctrlr->async_event_config.raw = 0xFF; - ctrlr->async_event_config.bits.reserved = 0; + ctrlr->async_event_config = 0xFF; + ctrlr->async_event_config &= ~NVME_CRIT_WARN_ST_RESERVED_MASK; status.done = 0; nvme_ctrlr_cmd_get_feature(ctrlr, NVME_FEAT_TEMPERATURE_THRESHOLD, @@ -702,7 +745,7 @@ (status.cpl.cdw0 & 0xFFFF) == 0xFFFF || (status.cpl.cdw0 & 0xFFFF) == 0x0000) { nvme_printf(ctrlr, "temperature threshold not supported\n"); - ctrlr->async_event_config.bits.temperature = 0; + ctrlr->async_event_config &= ~NVME_CRIT_WARN_ST_TEMPERATURE; } nvme_ctrlr_cmd_set_async_event_config(ctrlr, @@ -907,12 +950,15 @@ nvme_pt_done(void *arg, const struct nvme_completion *cpl) { struct nvme_pt_command *pt = arg; + uint16_t status; bzero(&pt->cpl, sizeof(pt->cpl)); pt->cpl.cdw0 = cpl->cdw0; - pt->cpl.status = cpl->status; - pt->cpl.status.p = 0; + status = cpl->status; + status &= ~NVME_STATUS_P_MASK; + pt->cpl.status = status; + mtx_lock(pt->driver_lock); wakeup(pt); mtx_unlock(pt->driver_lock); @@ -973,20 +1019,24 @@ } else req = nvme_allocate_request_null(nvme_pt_done, pt); - req->cmd.opc = pt->cmd.opc; - req->cmd.cdw10 = pt->cmd.cdw10; - req->cmd.cdw11 = pt->cmd.cdw11; - req->cmd.cdw12 = pt->cmd.cdw12; - req->cmd.cdw13 = pt->cmd.cdw13; - req->cmd.cdw14 = pt->cmd.cdw14; - req->cmd.cdw15 = pt->cmd.cdw15; + /* Assume userspace already converted to little-endian */ + req->cmd.opc_fuse = pt->cmd.opc_fuse; + req->cmd.cdw10 = pt->cmd.cdw10; + req->cmd.cdw11 = pt->cmd.cdw11; + req->cmd.cdw12 = pt->cmd.cdw12; + req->cmd.cdw13 = pt->cmd.cdw13; + req->cmd.cdw14 = pt->cmd.cdw14; + req->cmd.cdw15 = pt->cmd.cdw15; - req->cmd.nsid = nsid; + req->cmd.nsid = htole32(nsid); if (is_admin_cmd) mtx = &ctrlr->lock; - else + else { + KASSERT((nsid-1) >= 0 && (nsid-1) < NVME_MAX_NAMESPACES, + ("%s: invalid namespace ID %d\n", __func__, nsid)); mtx = &ctrlr->ns[nsid-1].lock; + } mtx_lock(mtx); pt->driver_lock = mtx; @@ -1025,7 +1075,7 @@ break; case NVME_PASSTHROUGH_CMD: pt = (struct nvme_pt_command *)arg; - return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, pt->cmd.nsid, + return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, le32toh(pt->cmd.nsid), 1 /* is_user_buffer */, 1 /* is_admin_cmd */)); default: return (ENOTTY); @@ -1125,9 +1175,12 @@ int nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) { - union cap_lo_register cap_lo; - union cap_hi_register cap_hi; - int status, timeout_period; + uint32_t cap_lo; + uint32_t cap_hi; + uint8_t to; + uint8_t dstrd; + uint8_t mpsmin; + int status, timeout_period; ctrlr->dev = dev; @@ -1142,15 +1195,18 @@ * Software emulators may set the doorbell stride to something * other than zero, but this driver is not set up to handle that. */ - cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi); - if (cap_hi.bits.dstrd != 0) + cap_hi = nvme_mmio_read_4(ctrlr, cap_hi); + dstrd = (cap_hi >> NVME_CAP_HI_REG_DSTRD_SHIFT) & NVME_CAP_HI_REG_DSTRD_MASK; + if (dstrd != 0) return (ENXIO); - ctrlr->min_page_size = 1 << (12 + cap_hi.bits.mpsmin); + mpsmin = (cap_hi >> NVME_CAP_HI_REG_MPSMIN_SHIFT) & NVME_CAP_HI_REG_MPSMIN_MASK; + ctrlr->min_page_size = 1 << (12 + mpsmin); /* Get ready timeout value from controller, in units of 500ms. */ - cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); - ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500; + cap_lo = nvme_mmio_read_4(ctrlr, cap_lo); + to = (cap_lo >> NVME_CAP_LO_REG_TO_SHIFT) & NVME_CAP_LO_REG_TO_MASK; + ctrlr->ready_timeout_in_ms = to * 500; timeout_period = NVME_DEFAULT_TIMEOUT_PERIOD; TUNABLE_INT_FETCH("hw.nvme.timeout_period", &timeout_period); @@ -1249,19 +1305,21 @@ void nvme_ctrlr_shutdown(struct nvme_controller *ctrlr) { - union cc_register cc; - union csts_register csts; - int ticks = 0; + uint32_t cc; + uint32_t csts; + int ticks = 0; - cc.raw = nvme_mmio_read_4(ctrlr, cc); - cc.bits.shn = NVME_SHN_NORMAL; - nvme_mmio_write_4(ctrlr, cc, cc.raw); - csts.raw = nvme_mmio_read_4(ctrlr, csts); - while ((csts.bits.shst != NVME_SHST_COMPLETE) && (ticks++ < 5*hz)) { + cc = nvme_mmio_read_4(ctrlr, cc); + cc &= ~(NVME_CC_REG_SHN_MASK << NVME_CC_REG_SHN_SHIFT); + cc |= NVME_SHN_NORMAL << NVME_CC_REG_SHN_SHIFT; + nvme_mmio_write_4(ctrlr, cc, cc); + + csts = nvme_mmio_read_4(ctrlr, csts); + while ((NVME_CSTS_GET_SHST(csts) != NVME_SHST_COMPLETE) && (ticks++ < 5*hz)) { pause("nvme shn", 1); - csts.raw = nvme_mmio_read_4(ctrlr, csts); + csts = nvme_mmio_read_4(ctrlr, csts); } - if (csts.bits.shst != NVME_SHST_COMPLETE) + if (NVME_CSTS_GET_SHST(csts) != NVME_SHST_COMPLETE) nvme_printf(ctrlr, "did not complete shutdown within 5 seconds " "of notification\n"); } Index: head/sys/dev/nvme/nvme_ctrlr_cmd.c =================================================================== --- head/sys/dev/nvme/nvme_ctrlr_cmd.c +++ head/sys/dev/nvme/nvme_ctrlr_cmd.c @@ -42,13 +42,13 @@ sizeof(struct nvme_controller_data), cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_IDENTIFY; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_IDENTIFY); /* * TODO: create an identify command data structure, which * includes this CNS bit in cdw10. */ - cmd->cdw10 = 1; + cmd->cdw10 = htole32(1); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -64,12 +64,12 @@ sizeof(struct nvme_namespace_data), cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_IDENTIFY; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_IDENTIFY); /* * TODO: create an identify command data structure */ - cmd->nsid = nsid; + cmd->nsid = htole32(nsid); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -85,16 +85,16 @@ req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_CREATE_IO_CQ; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_CREATE_IO_CQ); /* * TODO: create a create io completion queue command data * structure. */ - cmd->cdw10 = ((io_que->num_entries-1) << 16) | io_que->id; + cmd->cdw10 = htole32(((io_que->num_entries-1) << 16) | io_que->id); /* 0x3 = interrupts enabled | physically contiguous */ - cmd->cdw11 = (vector << 16) | 0x3; - cmd->prp1 = io_que->cpl_bus_addr; + cmd->cdw11 = htole32((vector << 16) | 0x3); + cmd->prp1 = htole64(io_que->cpl_bus_addr); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -109,16 +109,16 @@ req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_CREATE_IO_SQ; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_CREATE_IO_SQ); /* * TODO: create a create io submission queue command data * structure. */ - cmd->cdw10 = ((io_que->num_entries-1) << 16) | io_que->id; + cmd->cdw10 = htole32(((io_que->num_entries-1) << 16) | io_que->id); /* 0x1 = physically contiguous */ - cmd->cdw11 = (io_que->id << 16) | 0x1; - cmd->prp1 = io_que->cmd_bus_addr; + cmd->cdw11 = htole32((io_que->id << 16) | 0x1); + cmd->prp1 = htole64(io_que->cmd_bus_addr); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -133,13 +133,13 @@ req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_DELETE_IO_CQ; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DELETE_IO_CQ); /* * TODO: create a delete io completion queue command data * structure. */ - cmd->cdw10 = io_que->id; + cmd->cdw10 = htole32(io_que->id); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -154,13 +154,13 @@ req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_DELETE_IO_SQ; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DELETE_IO_SQ); /* * TODO: create a delete io submission queue command data * structure. */ - cmd->cdw10 = io_que->id; + cmd->cdw10 = htole32(io_que->id); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -176,9 +176,9 @@ req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_SET_FEATURES; - cmd->cdw10 = feature; - cmd->cdw11 = cdw11; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_SET_FEATURES); + cmd->cdw10 = htole32(feature); + cmd->cdw11 = htole32(cdw11); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -194,9 +194,9 @@ req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_GET_FEATURES; - cmd->cdw10 = feature; - cmd->cdw11 = cdw11; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_GET_FEATURES); + cmd->cdw10 = htole32(feature); + cmd->cdw11 = htole32(cdw11); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -214,12 +214,11 @@ void nvme_ctrlr_cmd_set_async_event_config(struct nvme_controller *ctrlr, - union nvme_critical_warning_state state, nvme_cb_fn_t cb_fn, - void *cb_arg) + uint8_t state, nvme_cb_fn_t cb_fn, void *cb_arg) { uint32_t cdw11; - cdw11 = state.raw; + cdw11 = state; nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_ASYNC_EVENT_CONFIGURATION, cdw11, NULL, 0, cb_fn, cb_arg); @@ -261,10 +260,11 @@ req = nvme_allocate_request_vaddr(payload, payload_size, cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_GET_LOG_PAGE; - cmd->nsid = nsid; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_GET_LOG_PAGE); + cmd->nsid = htole32(nsid); cmd->cdw10 = ((payload_size/sizeof(uint32_t)) - 1) << 16; cmd->cdw10 |= log_page; + cmd->cdw10 = htole32(cmd->cdw10); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -320,8 +320,8 @@ req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_ABORT; - cmd->cdw10 = (cid << 16) | sqid; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_ABORT); + cmd->cdw10 = htole32((cid << 16) | sqid); nvme_ctrlr_submit_admin_request(ctrlr, req); } Index: head/sys/dev/nvme/nvme_ns.c =================================================================== --- head/sys/dev/nvme/nvme_ns.c +++ head/sys/dev/nvme/nvme_ns.c @@ -172,7 +172,14 @@ uint32_t nvme_ns_get_sector_size(struct nvme_namespace *ns) { - return (1 << ns->data.lbaf[ns->data.flbas.format].lbads); + uint8_t flbas_fmt, lbads; + + flbas_fmt = (ns->data.flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & + NVME_NS_DATA_FLBAS_FORMAT_MASK; + lbads = (ns->data.lbaf[flbas_fmt] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) & + NVME_NS_DATA_LBAF_LBADS_MASK; + + return (1 << lbads); } uint64_t @@ -265,8 +272,10 @@ inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1; if (inbed == children) { bzero(&parent_cpl, sizeof(parent_cpl)); - if (parent->bio_flags & BIO_ERROR) - parent_cpl.status.sc = NVME_SC_DATA_TRANSFER_ERROR; + if (parent->bio_flags & BIO_ERROR) { + parent_cpl.status &= ~(NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT); + parent_cpl.status |= (NVME_SC_DATA_TRANSFER_ERROR) << NVME_STATUS_SC_SHIFT; + } nvme_ns_bio_done(parent, &parent_cpl); } } @@ -459,10 +468,14 @@ dsm_range = malloc(sizeof(struct nvme_dsm_range), M_NVME, M_ZERO | M_WAITOK); + if (!dsm_range) { + err = ENOMEM; + break; + } dsm_range->length = - bp->bio_bcount/nvme_ns_get_sector_size(ns); + htole32(bp->bio_bcount/nvme_ns_get_sector_size(ns)); dsm_range->starting_lba = - bp->bio_offset/nvme_ns_get_sector_size(ns); + htole64(bp->bio_offset/nvme_ns_get_sector_size(ns)); bp->bio_driver2 = dsm_range; err = nvme_ns_cmd_deallocate(ns, dsm_range, 1, nvme_ns_bio_done, bp); @@ -483,6 +496,10 @@ { struct nvme_completion_poll_status status; int unit; + uint16_t oncs; + uint8_t dsm; + uint8_t flbas_fmt; + uint8_t vwc_present; ns->ctrlr = ctrlr; ns->id = id; @@ -513,6 +530,9 @@ return (ENXIO); } + /* Convert data to host endian */ + nvme_namespace_data_swapbytes(&ns->data); + /* * If the size of is zero, chances are this isn't a valid * namespace (eg one that's not been configured yet). The @@ -522,20 +542,26 @@ if (ns->data.nsze == 0) return (ENXIO); + flbas_fmt = (ns->data.flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & + NVME_NS_DATA_FLBAS_FORMAT_MASK; /* * Note: format is a 0-based value, so > is appropriate here, * not >=. */ - if (ns->data.flbas.format > ns->data.nlbaf) { + if (flbas_fmt > ns->data.nlbaf) { printf("lba format %d exceeds number supported (%d)\n", - ns->data.flbas.format, ns->data.nlbaf+1); + flbas_fmt, ns->data.nlbaf + 1); return (ENXIO); } - if (ctrlr->cdata.oncs.dsm) + oncs = ctrlr->cdata.oncs; + dsm = (oncs >> NVME_CTRLR_DATA_ONCS_DSM_SHIFT) & NVME_CTRLR_DATA_ONCS_DSM_MASK; + if (dsm) ns->flags |= NVME_NS_DEALLOCATE_SUPPORTED; - if (ctrlr->cdata.vwc.present) + vwc_present = (ctrlr->cdata.vwc >> NVME_CTRLR_DATA_VWC_PRESENT_SHIFT) & + NVME_CTRLR_DATA_VWC_PRESENT_MASK; + if (vwc_present) ns->flags |= NVME_NS_FLUSH_SUPPORTED; /* Index: head/sys/dev/nvme/nvme_ns_cmd.c =================================================================== --- head/sys/dev/nvme/nvme_ns_cmd.c +++ head/sys/dev/nvme/nvme_ns_cmd.c @@ -126,12 +126,12 @@ return (ENOMEM); cmd = &req->cmd; - cmd->opc = NVME_OPC_DATASET_MANAGEMENT; - cmd->nsid = ns->id; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DATASET_MANAGEMENT); + cmd->nsid = htole32(ns->id); /* TODO: create a delete command data structure */ - cmd->cdw10 = num_ranges - 1; - cmd->cdw11 = NVME_DSM_ATTR_DEALLOCATE; + cmd->cdw10 = htole32(num_ranges - 1); + cmd->cdw11 = htole32(NVME_DSM_ATTR_DEALLOCATE); nvme_ctrlr_submit_io_request(ns->ctrlr, req); Index: head/sys/dev/nvme/nvme_private.h =================================================================== --- head/sys/dev/nvme/nvme_private.h +++ head/sys/dev/nvme/nvme_private.h @@ -312,8 +312,8 @@ struct cdev *cdev; - /** bit mask of warning types currently enabled for async events */ - union nvme_critical_warning_state async_event_config; + /** bit mask of critical warning types currently enabled for async events */ + uint8_t async_event_config; uint32_t num_aers; struct nvme_async_event_request aer[NVME_MAX_ASYNC_EVENTS]; @@ -339,13 +339,13 @@ bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg), val) -#define nvme_mmio_write_8(sc, reg, val) \ +#define nvme_mmio_write_8(sc, reg, val) \ do { \ bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg), val & 0xFFFFFFFF); \ bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg)+4, \ - (val & 0xFFFFFFFF00000000UL) >> 32); \ + (val & 0xFFFFFFFF00000000ULL) >> 32); \ } while (0); #if __FreeBSD_version < 800054 @@ -399,7 +399,7 @@ uint32_t num_queues, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_set_async_event_config(struct nvme_controller *ctrlr, - union nvme_critical_warning_state state, + uint8_t state, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_abort(struct nvme_controller *ctrlr, uint16_t cid, uint16_t sqid, nvme_cb_fn_t cb_fn, void *cb_arg); Index: head/sys/dev/nvme/nvme_qpair.c =================================================================== --- head/sys/dev/nvme/nvme_qpair.c +++ head/sys/dev/nvme/nvme_qpair.c @@ -110,40 +110,42 @@ nvme_admin_qpair_print_command(struct nvme_qpair *qpair, struct nvme_command *cmd) { + uint16_t opc; + opc = le16toh(cmd->opc_fuse) & NVME_CMD_OPC_MASK; nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%x " "cdw10:%08x cdw11:%08x\n", - get_admin_opcode_string(cmd->opc), cmd->opc, qpair->id, cmd->cid, - cmd->nsid, cmd->cdw10, cmd->cdw11); + get_admin_opcode_string(opc), opc, qpair->id, cmd->cid, + le32toh(cmd->nsid), le32toh(cmd->cdw10), le32toh(cmd->cdw11)); } static void nvme_io_qpair_print_command(struct nvme_qpair *qpair, struct nvme_command *cmd) { + uint16_t opc; - switch (cmd->opc) { + opc = le16toh(cmd->opc_fuse) & NVME_CMD_OPC_MASK; + switch (opc) { case NVME_OPC_WRITE: case NVME_OPC_READ: case NVME_OPC_WRITE_UNCORRECTABLE: case NVME_OPC_COMPARE: nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d " "lba:%llu len:%d\n", - get_io_opcode_string(cmd->opc), qpair->id, cmd->cid, - cmd->nsid, - ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10, - (cmd->cdw12 & 0xFFFF) + 1); + get_io_opcode_string(opc), qpair->id, cmd->cid, le32toh(cmd->nsid), + ((unsigned long long)le32toh(cmd->cdw11) << 32) + le32toh(cmd->cdw10), + (le32toh(cmd->cdw12) & 0xFFFF) + 1); break; case NVME_OPC_FLUSH: case NVME_OPC_DATASET_MANAGEMENT: nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d\n", - get_io_opcode_string(cmd->opc), qpair->id, cmd->cid, - cmd->nsid); + get_io_opcode_string(opc), qpair->id, cmd->cid, le32toh(cmd->nsid)); break; default: nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%d\n", - get_io_opcode_string(cmd->opc), cmd->opc, qpair->id, - cmd->cid, cmd->nsid); + get_io_opcode_string(opc), opc, qpair->id, + cmd->cid, le32toh(cmd->nsid)); break; } } @@ -245,26 +247,37 @@ nvme_qpair_print_completion(struct nvme_qpair *qpair, struct nvme_completion *cpl) { + uint16_t sct, sc; + + sct = NVME_STATUS_GET_SCT(cpl->status); + sc = NVME_STATUS_GET_SC(cpl->status); + nvme_printf(qpair->ctrlr, "%s (%02x/%02x) sqid:%d cid:%d cdw0:%x\n", - get_status_string(cpl->status.sct, cpl->status.sc), - cpl->status.sct, cpl->status.sc, cpl->sqid, cpl->cid, cpl->cdw0); + get_status_string(sct, sc), sct, sc, cpl->sqid, cpl->cid, + cpl->cdw0); } static boolean_t nvme_completion_is_retry(const struct nvme_completion *cpl) { + uint8_t sct, sc, dnr; + + sct = NVME_STATUS_GET_SCT(cpl->status); + sc = NVME_STATUS_GET_SC(cpl->status); + dnr = NVME_STATUS_GET_DNR(cpl->status); + /* * TODO: spec is not clear how commands that are aborted due * to TLER will be marked. So for now, it seems * NAMESPACE_NOT_READY is the only case where we should * look at the DNR bit. */ - switch (cpl->status.sct) { + switch (sct) { case NVME_SCT_GENERIC: - switch (cpl->status.sc) { + switch (sc) { case NVME_SC_ABORTED_BY_REQUEST: case NVME_SC_NAMESPACE_NOT_READY: - if (cpl->status.dnr) + if (dnr) return (0); else return (1); @@ -359,9 +372,9 @@ memset(&cpl, 0, sizeof(cpl)); cpl.sqid = qpair->id; cpl.cid = tr->cid; - cpl.status.sct = sct; - cpl.status.sc = sc; - cpl.status.dnr = dnr; + cpl.status |= (sct & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT; + cpl.status |= (sc & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT; + cpl.status |= (dnr & NVME_STATUS_DNR_MASK) << NVME_STATUS_DNR_SHIFT; nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); } @@ -375,8 +388,8 @@ memset(&cpl, 0, sizeof(cpl)); cpl.sqid = qpair->id; - cpl.status.sct = sct; - cpl.status.sc = sc; + cpl.status |= (sct & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT; + cpl.status |= (sc & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT; error = nvme_completion_is_error(&cpl); @@ -395,7 +408,7 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair) { struct nvme_tracker *tr; - struct nvme_completion *cpl; + struct nvme_completion cpl; qpair->num_intr_handler_calls++; @@ -409,20 +422,24 @@ return; while (1) { - cpl = &qpair->cpl[qpair->cq_head]; + cpl = qpair->cpl[qpair->cq_head]; - if (cpl->status.p != qpair->phase) + /* Convert to host endian */ + nvme_completion_swapbytes(&cpl); + + if (NVME_STATUS_GET_P(cpl.status) != qpair->phase) break; - tr = qpair->act_tr[cpl->cid]; + tr = qpair->act_tr[cpl.cid]; if (tr != NULL) { - nvme_qpair_complete_tracker(qpair, tr, cpl, TRUE); - qpair->sq_head = cpl->sqhd; + nvme_qpair_complete_tracker(qpair, tr, &cpl, TRUE); + qpair->sq_head = cpl.sqhd; } else { nvme_printf(qpair->ctrlr, "cpl does not map to outstanding cmd\n"); - nvme_dump_completion(cpl); + /* nvme_dump_completion expects device endianess */ + nvme_dump_completion(&qpair->cpl[qpair->cq_head]); KASSERT(0, ("received completion for unknown cmd\n")); } @@ -629,7 +646,7 @@ tr = TAILQ_FIRST(&qpair->outstanding_tr); while (tr != NULL) { - if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) { + if ((le16toh(tr->req->cmd.opc_fuse) & NVME_CMD_OPC_MASK) == NVME_OPC_ASYNC_EVENT_REQUEST) { nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, 0, FALSE); @@ -685,12 +702,14 @@ struct nvme_tracker *tr = arg; struct nvme_qpair *qpair = tr->qpair; struct nvme_controller *ctrlr = qpair->ctrlr; - union csts_register csts; + uint32_t csts; + uint8_t cfs; /* Read csts to get value of cfs - controller fatal status. */ - csts.raw = nvme_mmio_read_4(ctrlr, csts); + csts = nvme_mmio_read_4(ctrlr, csts); - if (ctrlr->enable_aborts && csts.bits.cfs == 0) { + cfs = (csts >> NVME_CSTS_REG_CFS_SHIFT) & NVME_CSTS_REG_CFS_MASK; + if (ctrlr->enable_aborts && cfs == 0) { /* * If aborts are enabled, only use them if the controller is * not reporting fatal status. @@ -759,16 +778,16 @@ * we can safely just transfer each segment to its * associated PRP entry. */ - tr->req->cmd.prp1 = seg[0].ds_addr; + tr->req->cmd.prp1 = htole64(seg[0].ds_addr); if (nseg == 2) { - tr->req->cmd.prp2 = seg[1].ds_addr; + tr->req->cmd.prp2 = htole64(seg[1].ds_addr); } else if (nseg > 2) { cur_nseg = 1; - tr->req->cmd.prp2 = (uint64_t)tr->prp_bus_addr; + tr->req->cmd.prp2 = htole64((uint64_t)tr->prp_bus_addr); while (cur_nseg < nseg) { tr->prp[cur_nseg-1] = - (uint64_t)seg[cur_nseg].ds_addr; + htole64((uint64_t)seg[cur_nseg].ds_addr); cur_nseg++; } } else {