Page MenuHomeFreeBSD

D5777.id.diff
No OneTemporary

D5777.id.diff

Index: head/sys/contrib/rdma/krping/krping.c
===================================================================
--- head/sys/contrib/rdma/krping/krping.c
+++ head/sys/contrib/rdma/krping/krping.c
@@ -56,6 +56,7 @@
extern int krping_debug;
#define DEBUG_LOG(cb, x...) if (krping_debug) krping_printf((cb)->cookie, x)
#define PRINTF(cb, x...) krping_printf((cb)->cookie, x)
+#define BIND_INFO 1
MODULE_AUTHOR("Steve Wise");
MODULE_DESCRIPTION("RDMA ping client/server");
@@ -99,7 +100,7 @@
{"poll", OPT_NOPARAM, 'P'},
{"local_dma_lkey", OPT_NOPARAM, 'Z'},
{"read_inv", OPT_NOPARAM, 'R'},
- {"fr", OPT_NOPARAM, 'f'},
+ {"fr", OPT_INT, 'f'},
{NULL, 0, 0}
};
@@ -232,6 +233,7 @@
int txdepth; /* SQ depth */
int local_dma_lkey; /* use 0 for lkey */
int frtest; /* fastreg test */
+ int testnum;
/* CM stuff */
struct rdma_cm_id *cm_id; /* connection on client side,*/
@@ -365,11 +367,7 @@
PRINTF(cb, "cq completion in ERROR state\n");
return;
}
- if (cb->frtest) {
- PRINTF(cb, "cq completion event in frtest!\n");
- return;
- }
- if (!cb->wlat && !cb->rlat && !cb->bw)
+ if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest)
ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
if (wc.status) {
@@ -411,7 +409,7 @@
DEBUG_LOG(cb, "recv completion\n");
cb->stats.recv_bytes += sizeof(cb->recv_buf);
cb->stats.recv_msgs++;
- if (cb->wlat || cb->rlat || cb->bw)
+ if (cb->wlat || cb->rlat || cb->bw || cb->frtest)
ret = server_recv(cb, &wc);
else
ret = cb->server ? server_recv(cb, &wc) :
@@ -464,7 +462,7 @@
return ret;
}
- if (!cb->wlat && !cb->rlat && !cb->bw) {
+ if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) {
wait_event_interruptible(cb->sem, cb->state >= CONNECTED);
if (cb->state == ERROR) {
PRINTF(cb, "wait for CONNECTED state %d\n",
@@ -502,7 +500,7 @@
cb->sq_wr.sg_list = &cb->send_sgl;
cb->sq_wr.num_sge = 1;
- if (cb->server || cb->wlat || cb->rlat || cb->bw) {
+ if (cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
cb->rdma_sgl.addr = cb->rdma_dma_addr;
if (cb->mem == MR)
cb->rdma_sgl.lkey = cb->rdma_mr->lkey;
@@ -531,7 +529,11 @@
case MW:
cb->bind_attr.wr_id = 0xabbaabba;
cb->bind_attr.send_flags = 0; /* unsignaled */
+#ifdef BIND_INFO
cb->bind_attr.bind_info.length = cb->size;
+#else
+ cb->bind_attr.length = cb->size;
+#endif
break;
default:
break;
@@ -646,7 +648,7 @@
buf.size = cb->size;
iovbase = cb->rdma_dma_addr;
cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1,
- IB_ACCESS_LOCAL_WRITE|
+ IB_ACCESS_LOCAL_WRITE|
IB_ACCESS_REMOTE_READ|
IB_ACCESS_REMOTE_WRITE,
&iovbase);
@@ -665,7 +667,7 @@
}
}
- if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+ if (!cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
cb->start_buf = kmalloc(cb->size, GFP_KERNEL);
if (!cb->start_buf) {
@@ -682,9 +684,9 @@
if (cb->mem == MR || cb->mem == MW) {
unsigned flags = IB_ACCESS_REMOTE_READ;
- if (cb->wlat || cb->rlat || cb->bw) {
+ if (cb->wlat || cb->rlat || cb->bw || cb->frtest) {
flags |= IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE;
+ IB_ACCESS_REMOTE_WRITE;
}
buf.addr = cb->start_dma_addr;
@@ -907,15 +909,33 @@
* Update the MW with new buf info.
*/
if (buf == (u64)cb->start_dma_addr) {
+#ifdef BIND_INFO
cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_READ;
cb->bind_attr.bind_info.mr = cb->start_mr;
+#else
+ cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ;
+ cb->bind_attr.mr = cb->start_mr;
+#endif
} else {
+#ifdef BIND_INFO
cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
cb->bind_attr.bind_info.mr = cb->rdma_mr;
+#else
+ cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
+ cb->bind_attr.mr = cb->rdma_mr;
+#endif
}
+#ifdef BIND_INFO
cb->bind_attr.bind_info.addr = buf;
+#else
+ cb->bind_attr.addr = buf;
+#endif
DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n",
+#ifdef BIND_INFO
cb->mw->rkey, buf, cb->bind_attr.bind_info.mr->rkey);
+#else
+ cb->mw->rkey, buf, cb->bind_attr.mr->rkey);
+#endif
ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr);
if (ret) {
PRINTF(cb, "bind mw error %d\n", ret);
@@ -950,7 +970,7 @@
* advertising the rdma buffer. Server side
* sends have no data.
*/
- if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+ if (!cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate);
info->buf = htonll(buf);
info->rkey = htonl(rkey);
@@ -980,7 +1000,6 @@
cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
cb->rdma_sq_wr.sg_list->length = cb->remote_len;
cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 1);
- cb->rdma_sq_wr.next = NULL;
/* Issue RDMA Read. */
if (cb->read_inv)
@@ -1484,7 +1503,6 @@
PRINTF(cb, "send completiong error %d\n", wc.status);
return;
}
-
wait_event_interruptible(cb->sem, cb->state == ERROR);
}
@@ -1557,9 +1575,10 @@
wait_event_interruptible(cb->sem, cb->state == ERROR);
}
-static int fastreg_supported(struct krping_cb *cb)
+static int fastreg_supported(struct krping_cb *cb, int server)
{
- struct ib_device *dev = cb->child_cm_id->device;
+ struct ib_device *dev = server?cb->child_cm_id->device:
+ cb->cm_id->device;
struct ib_device_attr attr;
int ret;
@@ -1610,158 +1629,259 @@
return -1;
}
- if (cb->mem == FASTREG && !fastreg_supported(cb))
+ if (cb->mem == FASTREG && !fastreg_supported(cb, 1))
return -EINVAL;
return 0;
}
-static void krping_run_server(struct krping_cb *cb)
+/*
+ * sq-depth worth of fastreg + 0B read-inv pairs, reposting them as the reads
+ * complete.
+ * NOTE: every 9 seconds we sleep for 1 second to keep the kernel happy.
+ */
+static void krping_fr_test5(struct krping_cb *cb)
{
- struct ib_recv_wr *bad_wr;
+ struct ib_fast_reg_page_list **pl;
+ struct ib_send_wr *fr, *read, *bad;
+ struct ib_wc wc;
+ struct ib_sge *sgl;
+ u8 key = 0;
+ struct ib_mr **mr;
+ u8 **buf;
+ dma_addr_t *dma_addr;
+ int i;
int ret;
+ int plen = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ time_t start;
+ int count = 0;
+ int scnt;
+ int depth = cb->txdepth >> 1;
- ret = krping_bind_server(cb);
- if (ret)
+ if (!depth) {
+ PRINTF(cb, "txdepth must be > 1 for this test!\n");
return;
-
- ret = krping_setup_qp(cb, cb->child_cm_id);
- if (ret) {
- PRINTF(cb, "setup_qp failed: %d\n", ret);
- goto err0;
}
- ret = krping_setup_buffers(cb);
- if (ret) {
- PRINTF(cb, "krping_setup_buffers failed: %d\n", ret);
+ pl = kzalloc(sizeof *pl * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s pl %p size %lu\n", __func__, pl, sizeof *pl * depth);
+ mr = kzalloc(sizeof *mr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s mr %p size %lu\n", __func__, mr, sizeof *mr * depth);
+ fr = kzalloc(sizeof *fr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s fr %p size %lu\n", __func__, fr, sizeof *fr * depth);
+ sgl = kzalloc(sizeof *sgl * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s sgl %p size %lu\n", __func__, sgl, sizeof *sgl * depth);
+ read = kzalloc(sizeof *read * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s read %p size %lu\n", __func__, read, sizeof *read * depth);
+ buf = kzalloc(sizeof *buf * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s buf %p size %lu\n", __func__, buf, sizeof *buf * depth);
+ dma_addr = kzalloc(sizeof *dma_addr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s dma_addr %p size %lu\n", __func__, dma_addr, sizeof *dma_addr * depth);
+ if (!pl || !mr || !fr || !read || !sgl || !buf || !dma_addr) {
+ PRINTF(cb, "kzalloc failed\n");
goto err1;
}
- ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
- if (ret) {
- PRINTF(cb, "ib_post_recv failed: %d\n", ret);
- goto err2;
- }
-
- ret = krping_accept(cb);
- if (ret) {
- PRINTF(cb, "connect error %d\n", ret);
- goto err2;
- }
-
- if (cb->wlat)
- krping_wlat_test_server(cb);
- else if (cb->rlat)
- krping_rlat_test_server(cb);
- else if (cb->bw)
- krping_bw_test_server(cb);
- else
- krping_test_server(cb);
- rdma_disconnect(cb->child_cm_id);
-err2:
- krping_free_buffers(cb);
-err1:
- krping_free_qp(cb);
-err0:
- rdma_destroy_id(cb->child_cm_id);
-}
-
-static void krping_test_client(struct krping_cb *cb)
-{
- int ping, start, cc, i, ret;
- struct ib_send_wr *bad_wr;
- unsigned char c;
-
- start = 65;
- for (ping = 0; !cb->count || ping < cb->count; ping++) {
- cb->state = RDMA_READ_ADV;
-
- /* Put some ascii text in the buffer. */
- cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping);
- for (i = cc, c = start; i < cb->size; i++) {
- cb->start_buf[i] = c;
- c++;
- if (c > 122)
- c = 65;
+ for (scnt = 0; scnt < depth; scnt++) {
+ pl[scnt] = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl[scnt])) {
+ PRINTF(cb, "alloc_fr_page_list failed %ld\n",
+ PTR_ERR(pl[scnt]));
+ goto err2;
}
- start++;
- if (start > 122)
- start = 65;
- cb->start_buf[cb->size - 1] = 0;
+ DEBUG_LOG(cb, "%s pl[%u] %p\n", __func__, scnt, pl[scnt]);
- krping_format_send(cb, cb->start_dma_addr);
- if (cb->state == ERROR) {
- PRINTF(cb, "krping_format_send failed\n");
- break;
- }
- ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
- if (ret) {
- PRINTF(cb, "post send error %d\n", ret);
- break;
+ mr[scnt] = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr[scnt])) {
+ PRINTF(cb, "alloc_fr failed %ld\n",
+ PTR_ERR(mr[scnt]));
+ goto err2;
}
+ DEBUG_LOG(cb, "%s mr[%u] %p\n", __func__, scnt, mr[scnt]);
+ ib_update_fast_reg_key(mr[scnt], ++key);
- /* Wait for server to ACK */
- wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
- if (cb->state != RDMA_WRITE_ADV) {
- PRINTF(cb,
- "wait for RDMA_WRITE_ADV state %d\n",
- cb->state);
- break;
+ buf[scnt] = kmalloc(cb->size, GFP_KERNEL);
+ if (!buf[scnt]) {
+ PRINTF(cb, "kmalloc failed\n");
+ ret = -ENOMEM;
+ goto err2;
}
-
- krping_format_send(cb, cb->rdma_dma_addr);
- ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ DEBUG_LOG(cb, "%s buf[%u] %p\n", __func__, scnt, buf[scnt]);
+ dma_addr[scnt] = dma_map_single(cb->pd->device->dma_device,
+ buf[scnt], cb->size,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(cb->pd->device->dma_device,
+ dma_addr[scnt])) {
+ PRINTF(cb, "dma_map failed\n");
+ ret = -ENOMEM;
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s dma_addr[%u] %p\n", __func__, scnt, (void *)dma_addr[scnt]);
+ for (i=0; i<plen; i++) {
+ pl[scnt]->page_list[i] = ((unsigned long)dma_addr[scnt] & PAGE_MASK) + (i * PAGE_SIZE);
+ DEBUG_LOG(cb, "%s pl[%u]->page_list[%u] 0x%llx\n",
+ __func__, scnt, i, pl[scnt]->page_list[i]);
+ }
+
+ sgl[scnt].lkey = mr[scnt]->rkey;
+ sgl[scnt].length = cb->size;
+ sgl[scnt].addr = (u64)buf[scnt];
+ DEBUG_LOG(cb, "%s sgl[%u].lkey 0x%x length %u addr 0x%llx\n",
+ __func__, scnt, sgl[scnt].lkey, sgl[scnt].length,
+ sgl[scnt].addr);
+
+ fr[scnt].opcode = IB_WR_FAST_REG_MR;
+ fr[scnt].wr_id = scnt;
+ fr[scnt].send_flags = 0;
+ fr[scnt].wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr[scnt].wr.fast_reg.length = cb->size;
+ fr[scnt].wr.fast_reg.page_list = pl[scnt];
+ fr[scnt].wr.fast_reg.page_list_len = plen;
+ fr[scnt].wr.fast_reg.iova_start = (u64)buf[scnt];
+ fr[scnt].wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr[scnt].wr.fast_reg.rkey = mr[scnt]->rkey;
+ fr[scnt].next = &read[scnt];
+ read[scnt].opcode = IB_WR_RDMA_READ_WITH_INV;
+ read[scnt].wr_id = scnt;
+ read[scnt].send_flags = IB_SEND_SIGNALED;
+ read[scnt].wr.rdma.rkey = cb->remote_rkey;
+ read[scnt].wr.rdma.remote_addr = cb->remote_addr;
+ read[scnt].num_sge = 1;
+ read[scnt].sg_list = &sgl[scnt];
+ ret = ib_post_send(cb->qp, &fr[scnt], &bad);
if (ret) {
- PRINTF(cb, "post send error %d\n", ret);
- break;
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err2;
}
+ }
- /* Wait for the server to say the RDMA Write is complete. */
- wait_event_interruptible(cb->sem,
- cb->state >= RDMA_WRITE_COMPLETE);
- if (cb->state != RDMA_WRITE_COMPLETE) {
- PRINTF(cb,
- "wait for RDMA_WRITE_COMPLETE state %d\n",
- cb->state);
+ start = time_uptime;
+ DEBUG_LOG(cb, "%s starting IO.\n", __func__);
+ while (!cb->count || cb->server || count < cb->count) {
+ if ((time_uptime - start) >= 9) {
+ DEBUG_LOG(cb, "%s pausing 1 tick! count %u\n", __func__,
+ count);
+ wait_event_interruptible_timeout(cb->sem,
+ cb->state == ERROR,
+ 1);
+ if (cb->state == ERROR)
+ break;
+ start = time_uptime;
+ }
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n",
+ ret);
+ goto err2;
+ }
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb,
+ "completion error %u wr_id %lld "
+ "opcode %d\n", wc.status,
+ wc.wr_id, wc.opcode);
+ goto err2;
+ }
+ count++;
+ if (count == cb->count)
+ break;
+ ib_update_fast_reg_key(mr[wc.wr_id], ++key);
+ fr[wc.wr_id].wr.fast_reg.rkey =
+ mr[wc.wr_id]->rkey;
+ sgl[wc.wr_id].lkey = mr[wc.wr_id]->rkey;
+ ret = ib_post_send(cb->qp, &fr[wc.wr_id], &bad);
+ if (ret) {
+ PRINTF(cb,
+ "ib_post_send failed %d\n", ret);
+ goto err2;
+ }
+ } else if (krping_sigpending()) {
+ PRINTF(cb, "signal!\n");
+ goto err2;
+ }
+ } while (ret == 1);
+ }
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+err2:
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
break;
}
-
- if (cb->validate)
- if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {
- PRINTF(cb, "data mismatch!\n");
- break;
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb, "completion error %u "
+ "opcode %u\n", wc.status, wc.opcode);
}
+ }
+ } while (ret == 1);
- if (cb->verbose) {
- if (strlen(cb->rdma_buf) > 128) {
- char msgbuf[128];
-
- strlcpy(msgbuf, cb->rdma_buf, sizeof(msgbuf));
- PRINTF(cb, "ping data stripped: %s\n",
- msgbuf);
- } else
- PRINTF(cb, "ping data: %s\n", cb->rdma_buf);
+ DEBUG_LOG(cb, "destroying fr mrs!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (mr[scnt]) {
+ ib_dereg_mr(mr[scnt]);
+ DEBUG_LOG(cb, "%s dereg mr %p\n", __func__, mr[scnt]);
+ }
+ }
+ DEBUG_LOG(cb, "unmapping/freeing bufs!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (buf[scnt]) {
+ dma_unmap_single(cb->pd->device->dma_device,
+ dma_addr[scnt], cb->size,
+ DMA_BIDIRECTIONAL);
+ kfree(buf[scnt]);
+ DEBUG_LOG(cb, "%s unmap/free buf %p dma_addr %p\n", __func__, buf[scnt], (void *)dma_addr[scnt]);
+ }
+ }
+ DEBUG_LOG(cb, "destroying fr page lists!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (pl[scnt]) {
+ DEBUG_LOG(cb, "%s free pl %p\n", __func__, pl[scnt]);
+ ib_free_fast_reg_page_list(pl[scnt]);
}
-#ifdef SLOW_KRPING
- wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
-#endif
}
+err1:
+ if (pl)
+ kfree(pl);
+ if (mr)
+ kfree(mr);
+ if (fr)
+ kfree(fr);
+ if (read)
+ kfree(read);
+ if (sgl)
+ kfree(sgl);
+ if (buf)
+ kfree(buf);
+ if (dma_addr)
+ kfree(dma_addr);
+}
+static void krping_fr_test_server(struct krping_cb *cb)
+{
+ DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
}
-static void krping_rlat_test_client(struct krping_cb *cb)
+static void krping_fr_test5_server(struct krping_cb *cb)
{
struct ib_send_wr *bad_wr;
struct ib_wc wc;
int ret;
- cb->state = RDMA_READ_ADV;
+ /* Spin waiting for client's Start STAG/TO/Len */
+ while (cb->state < RDMA_READ_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+ DEBUG_LOG(cb, "%s client STAG %x TO 0x%llx\n", __func__,
+ cb->remote_rkey, cb->remote_addr);
/* Send STAG/TO/Len to client */
krping_format_send(cb, cb->start_dma_addr);
- if (cb->state == ERROR) {
- PRINTF(cb, "krping_format_send failed\n");
- return;
- }
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
PRINTF(cb, "post send error %d\n", ret);
@@ -1775,84 +1895,31 @@
return;
}
if (wc.status) {
- PRINTF(cb, "send completion error %d\n", wc.status);
+ PRINTF(cb, "send completiong error %d\n", wc.status);
return;
}
- /* Spin waiting for server's Start STAG/TO/Len */
- while (cb->state < RDMA_WRITE_ADV) {
- krping_cq_event_handler(cb->cq, cb);
- }
-
-#if 0
-{
- int i;
- struct timeval start, stop;
- time_t sec;
- suseconds_t usec;
- unsigned long long elapsed;
- struct ib_wc wc;
- struct ib_send_wr *bad_wr;
- int ne;
-
- cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
- cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
- cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
- cb->rdma_sq_wr.sg_list->length = 0;
- cb->rdma_sq_wr.num_sge = 0;
-
- microtime(&start);
- for (i=0; i < 100000; i++) {
- if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
- PRINTF(cb, "Couldn't post send\n");
- return;
- }
- do {
- ne = ib_poll_cq(cb->cq, 1, &wc);
- } while (ne == 0);
- if (ne < 0) {
- PRINTF(cb, "poll CQ failed %d\n", ne);
- return;
- }
- if (wc.status != IB_WC_SUCCESS) {
- PRINTF(cb, "Completion wth error at %s:\n",
- cb->server ? "server" : "client");
- PRINTF(cb, "Failed status %d: wr_id %d\n",
- wc.status, (int) wc.wr_id);
- return;
- }
- }
- microtime(&stop);
-
- if (stop.tv_usec < start.tv_usec) {
- stop.tv_usec += 1000000;
- stop.tv_sec -= 1;
- }
- sec = stop.tv_sec - start.tv_sec;
- usec = stop.tv_usec - start.tv_usec;
- elapsed = sec * 1000000 + usec;
- PRINTF(cb, "0B-write-lat iters 100000 usec %llu\n", elapsed);
-}
-#endif
-
- rlat_test(cb);
+ if (cb->duplex)
+ krping_fr_test5(cb);
+ DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
}
-static void krping_wlat_test_client(struct krping_cb *cb)
+static void krping_fr_test5_client(struct krping_cb *cb)
{
- struct ib_send_wr *bad_wr;
+ struct ib_send_wr *bad;
struct ib_wc wc;
int ret;
cb->state = RDMA_READ_ADV;
- /* Send STAG/TO/Len to client */
+ /* Send STAG/TO/Len to server */
krping_format_send(cb, cb->start_dma_addr);
if (cb->state == ERROR) {
PRINTF(cb, "krping_format_send failed\n");
return;
}
- ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad);
if (ret) {
PRINTF(cb, "post send error %d\n", ret);
return;
@@ -1873,15 +1940,619 @@
while (cb->state < RDMA_WRITE_ADV) {
krping_cq_event_handler(cb->cq, cb);
}
+ DEBUG_LOG(cb, "%s server STAG %x TO 0x%llx\n", __func__, cb->remote_rkey, cb->remote_addr);
- wlat_test(cb);
+ return krping_fr_test5(cb);
}
-static void krping_bw_test_client(struct krping_cb *cb)
+/*
+ * sq-depth worth of write + fastreg + inv, reposting them as the invs
+ * complete.
+ * NOTE: every 9 seconds we sleep for 1 second to keep the kernel happy.
+ * If a count is given, then the last IO will have a bogus lkey in the
+ * write work request. This reproduces a fw bug where the connection
+ * will get stuck if a fastreg is processed while the ulptx is failing
+ * the bad write.
+ */
+static void krping_fr_test6(struct krping_cb *cb)
{
- struct ib_send_wr *bad_wr;
+ struct ib_fast_reg_page_list **pl;
+ struct ib_send_wr *fr, *write, *inv, *bad;
struct ib_wc wc;
- int ret;
+ struct ib_sge *sgl;
+ u8 key = 0;
+ struct ib_mr **mr;
+ u8 **buf;
+ dma_addr_t *dma_addr;
+ int i;
+ int ret;
+ int plen = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ unsigned long start;
+ int count = 0;
+ int scnt;
+ int depth = cb->txdepth / 3;
+
+ if (!depth) {
+ PRINTF(cb, "txdepth must be > 3 for this test!\n");
+ return;
+ }
+
+ pl = kzalloc(sizeof *pl * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s pl %p size %lu\n", __func__, pl, sizeof *pl * depth);
+
+ mr = kzalloc(sizeof *mr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s mr %p size %lu\n", __func__, mr, sizeof *mr * depth);
+
+ fr = kzalloc(sizeof *fr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s fr %p size %lu\n", __func__, fr, sizeof *fr * depth);
+
+ sgl = kzalloc(sizeof *sgl * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s sgl %p size %lu\n", __func__, sgl, sizeof *sgl * depth);
+
+ write = kzalloc(sizeof *write * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s read %p size %lu\n", __func__, write, sizeof *write * depth);
+
+ inv = kzalloc(sizeof *inv * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s inv %p size %lu\n", __func__, inv, sizeof *inv * depth);
+
+ buf = kzalloc(sizeof *buf * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s buf %p size %lu\n", __func__, buf, sizeof *buf * depth);
+
+ dma_addr = kzalloc(sizeof *dma_addr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s dma_addr %p size %lu\n", __func__, dma_addr, sizeof *dma_addr * depth);
+
+ if (!pl || !mr || !fr || !write || !sgl || !buf || !dma_addr) {
+ PRINTF(cb, "kzalloc failed\n");
+ goto err1;
+ }
+
+ for (scnt = 0; scnt < depth; scnt++) {
+ pl[scnt] = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl[scnt])) {
+ PRINTF(cb, "alloc_fr_page_list failed %ld\n",
+ PTR_ERR(pl[scnt]));
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s pl[%u] %p\n", __func__, scnt, pl[scnt]);
+
+ mr[scnt] = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr[scnt])) {
+ PRINTF(cb, "alloc_fr failed %ld\n",
+ PTR_ERR(mr[scnt]));
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s mr[%u] %p\n", __func__, scnt, mr[scnt]);
+ ib_update_fast_reg_key(mr[scnt], ++key);
+
+ buf[scnt] = kmalloc(cb->size, GFP_KERNEL);
+ if (!buf[scnt]) {
+ PRINTF(cb, "kmalloc failed\n");
+ ret = -ENOMEM;
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s buf[%u] %p\n", __func__, scnt, buf[scnt]);
+ dma_addr[scnt] = dma_map_single(cb->pd->device->dma_device,
+ buf[scnt], cb->size,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(cb->pd->device->dma_device,
+ dma_addr[scnt])) {
+ PRINTF(cb, "dma_map failed\n");
+ ret = -ENOMEM;
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s dma_addr[%u] %p\n", __func__, scnt, (void *)dma_addr[scnt]);
+ for (i=0; i<plen; i++) {
+ pl[scnt]->page_list[i] = ((unsigned long)dma_addr[scnt] & PAGE_MASK) + (i * PAGE_SIZE);
+ DEBUG_LOG(cb, "%s pl[%u]->page_list[%u] 0x%llx\n",
+ __func__, scnt, i, pl[scnt]->page_list[i]);
+ }
+
+ write[scnt].opcode = IB_WR_RDMA_WRITE;
+ write[scnt].wr_id = scnt;
+ write[scnt].wr.rdma.rkey = cb->remote_rkey;
+ write[scnt].wr.rdma.remote_addr = cb->remote_addr;
+ write[scnt].num_sge = 1;
+ write[scnt].sg_list = &cb->rdma_sgl;
+ write[scnt].sg_list->length = cb->size;
+ write[scnt].next = &fr[scnt];
+
+ fr[scnt].opcode = IB_WR_FAST_REG_MR;
+ fr[scnt].wr_id = scnt;
+ fr[scnt].wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr[scnt].wr.fast_reg.length = cb->size;
+ fr[scnt].wr.fast_reg.page_list = pl[scnt];
+ fr[scnt].wr.fast_reg.page_list_len = plen;
+ fr[scnt].wr.fast_reg.iova_start = (u64)buf[scnt];
+ fr[scnt].wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr[scnt].wr.fast_reg.rkey = mr[scnt]->rkey;
+ fr[scnt].next = &inv[scnt];
+
+ inv[scnt].opcode = IB_WR_LOCAL_INV;
+ inv[scnt].send_flags = IB_SEND_SIGNALED;
+ inv[scnt].ex.invalidate_rkey = mr[scnt]->rkey;
+
+ ret = ib_post_send(cb->qp, &write[scnt], &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err2;
+ }
+ }
+
+ start = time_uptime;
+ DEBUG_LOG(cb, "%s starting IO.\n", __func__);
+ while (!cb->count || cb->server || count < cb->count) {
+ if ((time_uptime - start) >= 9) {
+ DEBUG_LOG(cb, "%s pausing 1 tick! count %u\n", __func__,
+ count);
+ wait_event_interruptible_timeout(cb->sem,
+ cb->state == ERROR,
+ 1);
+ if (cb->state == ERROR)
+ break;
+ start = time_uptime;
+ }
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n",
+ ret);
+ goto err2;
+ }
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb,
+ "completion error %u wr_id %lld "
+ "opcode %d\n", wc.status,
+ wc.wr_id, wc.opcode);
+ goto err2;
+ }
+ count++;
+ if (count == (cb->count -1))
+ cb->rdma_sgl.lkey = 0x00dead;
+ if (count == cb->count)
+ break;
+ ib_update_fast_reg_key(mr[wc.wr_id], ++key);
+ fr[wc.wr_id].wr.fast_reg.rkey =
+ mr[wc.wr_id]->rkey;
+ inv[wc.wr_id].ex.invalidate_rkey =
+ mr[wc.wr_id]->rkey;
+ ret = ib_post_send(cb->qp, &write[wc.wr_id], &bad);
+ if (ret) {
+ PRINTF(cb,
+ "ib_post_send failed %d\n", ret);
+ goto err2;
+ }
+ } else if (krping_sigpending()){
+ PRINTF(cb, "signal!\n");
+ goto err2;
+ }
+ } while (ret == 1);
+ }
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+err2:
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ break;
+ }
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb, "completion error %u "
+ "opcode %u\n", wc.status, wc.opcode);
+ }
+ }
+ } while (ret == 1);
+
+ DEBUG_LOG(cb, "destroying fr mrs!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (mr[scnt]) {
+ ib_dereg_mr(mr[scnt]);
+ DEBUG_LOG(cb, "%s dereg mr %p\n", __func__, mr[scnt]);
+ }
+ }
+ DEBUG_LOG(cb, "unmapping/freeing bufs!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (buf[scnt]) {
+ dma_unmap_single(cb->pd->device->dma_device,
+ dma_addr[scnt], cb->size,
+ DMA_BIDIRECTIONAL);
+ kfree(buf[scnt]);
+ DEBUG_LOG(cb, "%s unmap/free buf %p dma_addr %p\n", __func__, buf[scnt], (void *)dma_addr[scnt]);
+ }
+ }
+ DEBUG_LOG(cb, "destroying fr page lists!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (pl[scnt]) {
+ DEBUG_LOG(cb, "%s free pl %p\n", __func__, pl[scnt]);
+ ib_free_fast_reg_page_list(pl[scnt]);
+ }
+ }
+err1:
+ if (pl)
+ kfree(pl);
+ if (mr)
+ kfree(mr);
+ if (fr)
+ kfree(fr);
+ if (write)
+ kfree(write);
+ if (inv)
+ kfree(inv);
+ if (sgl)
+ kfree(sgl);
+ if (buf)
+ kfree(buf);
+ if (dma_addr)
+ kfree(dma_addr);
+}
+
+static void krping_fr_test6_server(struct krping_cb *cb)
+{
+ struct ib_send_wr *bad_wr;
+ struct ib_wc wc;
+ int ret;
+
+ /* Spin waiting for client's Start STAG/TO/Len */
+ while (cb->state < RDMA_READ_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+ DEBUG_LOG(cb, "%s client STAG %x TO 0x%llx\n", __func__,
+ cb->remote_rkey, cb->remote_addr);
+
+ /* Send STAG/TO/Len to client */
+ krping_format_send(cb, cb->start_dma_addr);
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ return;
+ }
+
+ /* Spin waiting for send completion */
+ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+ if (ret < 0) {
+ PRINTF(cb, "poll error %d\n", ret);
+ return;
+ }
+ if (wc.status) {
+ PRINTF(cb, "send completiong error %d\n", wc.status);
+ return;
+ }
+
+ if (cb->duplex)
+ krping_fr_test6(cb);
+ DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
+}
+
+static void krping_fr_test6_client(struct krping_cb *cb)
+{
+ struct ib_send_wr *bad;
+ struct ib_wc wc;
+ int ret;
+
+ cb->state = RDMA_READ_ADV;
+
+ /* Send STAG/TO/Len to server */
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ return;
+ }
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ return;
+ }
+
+ /* Spin waiting for send completion */
+ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+ if (ret < 0) {
+ PRINTF(cb, "poll error %d\n", ret);
+ return;
+ }
+ if (wc.status) {
+ PRINTF(cb, "send completion error %d\n", wc.status);
+ return;
+ }
+
+ /* Spin waiting for server's Start STAG/TO/Len */
+ while (cb->state < RDMA_WRITE_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+ DEBUG_LOG(cb, "%s server STAG %x TO 0x%llx\n", __func__, cb->remote_rkey, cb->remote_addr);
+
+ return krping_fr_test6(cb);
+}
+
+static void krping_run_server(struct krping_cb *cb)
+{
+ struct ib_recv_wr *bad_wr;
+ int ret;
+
+ ret = krping_bind_server(cb);
+ if (ret)
+ return;
+
+ ret = krping_setup_qp(cb, cb->child_cm_id);
+ if (ret) {
+ PRINTF(cb, "setup_qp failed: %d\n", ret);
+ goto err0;
+ }
+
+ ret = krping_setup_buffers(cb);
+ if (ret) {
+ PRINTF(cb, "krping_setup_buffers failed: %d\n", ret);
+ goto err1;
+ }
+
+ ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "ib_post_recv failed: %d\n", ret);
+ goto err2;
+ }
+
+ ret = krping_accept(cb);
+ if (ret) {
+ PRINTF(cb, "connect error %d\n", ret);
+ goto err2;
+ }
+
+ if (cb->wlat)
+ krping_wlat_test_server(cb);
+ else if (cb->rlat)
+ krping_rlat_test_server(cb);
+ else if (cb->bw)
+ krping_bw_test_server(cb);
+ else if (cb->frtest) {
+ switch (cb->testnum) {
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ krping_fr_test_server(cb);
+ break;
+ case 5:
+ krping_fr_test5_server(cb);
+ break;
+ case 6:
+ krping_fr_test6_server(cb);
+ break;
+ default:
+ PRINTF(cb, "unknown fr test %d\n", cb->testnum);
+ goto err2;
+ break;
+ }
+ } else
+ krping_test_server(cb);
+ rdma_disconnect(cb->child_cm_id);
+err2:
+ krping_free_buffers(cb);
+err1:
+ krping_free_qp(cb);
+err0:
+ rdma_destroy_id(cb->child_cm_id);
+}
+
+static void krping_test_client(struct krping_cb *cb)
+{
+ int ping, start, cc, i, ret;
+ struct ib_send_wr *bad_wr;
+ unsigned char c;
+
+ start = 65;
+ for (ping = 0; !cb->count || ping < cb->count; ping++) {
+ cb->state = RDMA_READ_ADV;
+
+ /* Put some ascii text in the buffer. */
+ cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping);
+ for (i = cc, c = start; i < cb->size; i++) {
+ cb->start_buf[i] = c;
+ c++;
+ if (c > 122)
+ c = 65;
+ }
+ start++;
+ if (start > 122)
+ start = 65;
+ cb->start_buf[cb->size - 1] = 0;
+
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ break;
+ }
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ break;
+ }
+
+ /* Wait for server to ACK */
+ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
+ if (cb->state != RDMA_WRITE_ADV) {
+ PRINTF(cb,
+ "wait for RDMA_WRITE_ADV state %d\n",
+ cb->state);
+ break;
+ }
+
+ krping_format_send(cb, cb->rdma_dma_addr);
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ break;
+ }
+
+ /* Wait for the server to say the RDMA Write is complete. */
+ wait_event_interruptible(cb->sem,
+ cb->state >= RDMA_WRITE_COMPLETE);
+ if (cb->state != RDMA_WRITE_COMPLETE) {
+ PRINTF(cb,
+ "wait for RDMA_WRITE_COMPLETE state %d\n",
+ cb->state);
+ break;
+ }
+
+ if (cb->validate)
+ if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {
+ PRINTF(cb, "data mismatch!\n");
+ break;
+ }
+
+ if (cb->verbose) {
+ if (strlen(cb->rdma_buf) > 128) {
+ char msgbuf[128];
+
+ strlcpy(msgbuf, cb->rdma_buf, sizeof(msgbuf));
+ PRINTF(cb, "ping data stripped: %s\n",
+ msgbuf);
+ } else
+ PRINTF(cb, "ping data: %s\n", cb->rdma_buf);
+ }
+#ifdef SLOW_KRPING
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+#endif
+ }
+}
+
+static void krping_rlat_test_client(struct krping_cb *cb)
+{
+ struct ib_send_wr *bad_wr;
+ struct ib_wc wc;
+ int ret;
+
+ cb->state = RDMA_READ_ADV;
+
+ /* Send STAG/TO/Len to client */
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ return;
+ }
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ return;
+ }
+
+ /* Spin waiting for send completion */
+ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+ if (ret < 0) {
+ PRINTF(cb, "poll error %d\n", ret);
+ return;
+ }
+ if (wc.status) {
+ PRINTF(cb, "send completion error %d\n", wc.status);
+ return;
+ }
+
+ /* Spin waiting for server's Start STAG/TO/Len */
+ while (cb->state < RDMA_WRITE_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+
+#if 0
+{
+ int i;
+ struct timeval start, stop;
+ time_t sec;
+ suseconds_t usec;
+ unsigned long long elapsed;
+ struct ib_wc wc;
+ struct ib_send_wr *bad_wr;
+ int ne;
+
+ cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
+ cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
+ cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
+ cb->rdma_sq_wr.sg_list->length = 0;
+ cb->rdma_sq_wr.num_sge = 0;
+
+ microtime(&start);
+ for (i=0; i < 100000; i++) {
+ if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
+ PRINTF(cb, "Couldn't post send\n");
+ return;
+ }
+ do {
+ ne = ib_poll_cq(cb->cq, 1, &wc);
+ } while (ne == 0);
+ if (ne < 0) {
+ PRINTF(cb, "poll CQ failed %d\n", ne);
+ return;
+ }
+ if (wc.status != IB_WC_SUCCESS) {
+ PRINTF(cb, "Completion wth error at %s:\n",
+ cb->server ? "server" : "client");
+ PRINTF(cb, "Failed status %d: wr_id %d\n",
+ wc.status, (int) wc.wr_id);
+ return;
+ }
+ }
+ microtime(&stop);
+
+ if (stop.tv_usec < start.tv_usec) {
+ stop.tv_usec += 1000000;
+ stop.tv_sec -= 1;
+ }
+ sec = stop.tv_sec - start.tv_sec;
+ usec = stop.tv_usec - start.tv_usec;
+ elapsed = sec * 1000000 + usec;
+ PRINTF(cb, "0B-write-lat iters 100000 usec %llu\n", elapsed);
+}
+#endif
+
+ rlat_test(cb);
+}
+
+static void krping_wlat_test_client(struct krping_cb *cb)
+{
+ struct ib_send_wr *bad_wr;
+ struct ib_wc wc;
+ int ret;
+
+ cb->state = RDMA_READ_ADV;
+
+ /* Send STAG/TO/Len to client */
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ return;
+ }
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ return;
+ }
+
+ /* Spin waiting for send completion */
+ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+ if (ret < 0) {
+ PRINTF(cb, "poll error %d\n", ret);
+ return;
+ }
+ if (wc.status) {
+ PRINTF(cb, "send completion error %d\n", wc.status);
+ return;
+ }
+
+ /* Spin waiting for server's Start STAG/TO/Len */
+ while (cb->state < RDMA_WRITE_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+
+ wlat_test(cb);
+}
+
+static void krping_bw_test_client(struct krping_cb *cb)
+{
+ struct ib_send_wr *bad_wr;
+ struct ib_wc wc;
+ int ret;
cb->state = RDMA_READ_ADV;
@@ -1916,7 +2587,212 @@
bw_test(cb);
}
-static void krping_fr_test(struct krping_cb *cb)
+
+/*
+ * fastreg 2 valid different mrs and verify the completions.
+ */
+static void krping_fr_test1(struct krping_cb *cb)
+{
+ struct ib_fast_reg_page_list *pl;
+ struct ib_send_wr fr, *bad;
+ struct ib_wc wc;
+ struct ib_mr *mr1, *mr2;
+ int i;
+ int ret;
+ int size = cb->size;
+ int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ int count = 0;
+
+ pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl)) {
+ PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl));
+ return;
+ }
+
+ mr1 = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr1)) {
+ PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl));
+ goto err1;
+ }
+ mr2 = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr2)) {
+ PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl));
+ goto err2;
+ }
+
+
+ for (i=0; i<plen; i++)
+ pl->page_list[i] = i * PAGE_SIZE;
+
+ memset(&fr, 0, sizeof fr);
+ fr.opcode = IB_WR_FAST_REG_MR;
+ fr.wr_id = 1;
+ fr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr.wr.fast_reg.length = size;
+ fr.wr.fast_reg.page_list = pl;
+ fr.wr.fast_reg.page_list_len = plen;
+ fr.wr.fast_reg.iova_start = 0;
+ fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr.send_flags = IB_SEND_SIGNALED;
+ fr.wr.fast_reg.rkey = mr1->rkey;
+ DEBUG_LOG(cb, "%s fr1: stag 0x%x plen %u size %u depth %u\n", __func__, fr.wr.fast_reg.rkey, plen, cb->size, cb->txdepth);
+ ret = ib_post_send(cb->qp, &fr, &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err3;
+ }
+ fr.wr.fast_reg.rkey = mr2->rkey;
+ DEBUG_LOG(cb, "%s fr2: stag 0x%x plen %u size %u depth %u\n", __func__, fr.wr.fast_reg.rkey, plen, cb->size, cb->txdepth);
+ ret = ib_post_send(cb->qp, &fr, &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err3;
+ }
+
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ goto err3;
+ }
+ if (ret == 1) {
+ DEBUG_LOG(cb, "completion status %u wr %s\n",
+ wc.status, wc.wr_id == 1 ? "fr" : "inv");
+ count++;
+ } else if (krping_sigpending()) {
+ PRINTF(cb, "signal!\n");
+ goto err3;
+ }
+
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ } while (count != 2);
+err3:
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ break;
+ }
+ if (ret == 1) {
+ PRINTF(cb, "completion %u opcode %u\n", wc.status, wc.opcode);
+ }
+ } while (ret == 1);
+ DEBUG_LOG(cb, "destroying fr mr2!\n");
+
+ ib_dereg_mr(mr2);
+err2:
+ DEBUG_LOG(cb, "destroying fr mr1!\n");
+ ib_dereg_mr(mr1);
+err1:
+ DEBUG_LOG(cb, "destroying fr page list!\n");
+ ib_free_fast_reg_page_list(pl);
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+}
+
+/*
+ * fastreg the same mr twice, 2nd one should produce error cqe.
+ */
+static void krping_fr_test2(struct krping_cb *cb)
+{
+ struct ib_fast_reg_page_list *pl;
+ struct ib_send_wr fr, *bad;
+ struct ib_wc wc;
+ struct ib_mr *mr1;
+ int i;
+ int ret;
+ int size = cb->size;
+ int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ int count = 0;
+
+ pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl)) {
+ PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl));
+ return;
+ }
+
+ mr1 = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr1)) {
+ PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl));
+ goto err1;
+ }
+
+ for (i=0; i<plen; i++)
+ pl->page_list[i] = i * PAGE_SIZE;
+
+ memset(&fr, 0, sizeof fr);
+ fr.opcode = IB_WR_FAST_REG_MR;
+ fr.wr_id = 1;
+ fr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr.wr.fast_reg.length = size;
+ fr.wr.fast_reg.page_list = pl;
+ fr.wr.fast_reg.page_list_len = plen;
+ fr.wr.fast_reg.iova_start = 0;
+ fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr.send_flags = IB_SEND_SIGNALED;
+ fr.wr.fast_reg.rkey = mr1->rkey;
+ DEBUG_LOG(cb, "%s fr1: stag 0x%x plen %u size %u depth %u\n", __func__, fr.wr.fast_reg.rkey, plen, cb->size, cb->txdepth);
+ ret = ib_post_send(cb->qp, &fr, &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err3;
+ }
+ DEBUG_LOG(cb, "%s fr2: stag 0x%x plen %u size %u depth %u\n", __func__, fr.wr.fast_reg.rkey, plen, cb->size, cb->txdepth);
+ ret = ib_post_send(cb->qp, &fr, &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err3;
+ }
+
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ goto err3;
+ }
+ if (ret == 1) {
+ DEBUG_LOG(cb, "completion status %u wr %s\n",
+ wc.status, wc.wr_id == 1 ? "fr" : "inv");
+ count++;
+ } else if (krping_sigpending()) {
+ PRINTF(cb, "signal!\n");
+ goto err3;
+ }
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ } while (count != 2);
+err3:
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ break;
+ }
+ if (ret == 1) {
+ PRINTF(cb, "completion %u opcode %u\n", wc.status, wc.opcode);
+ }
+ } while (ret == 1);
+ DEBUG_LOG(cb, "destroying fr mr1!\n");
+ ib_dereg_mr(mr1);
+err1:
+ DEBUG_LOG(cb, "destroying fr page list!\n");
+ ib_free_fast_reg_page_list(pl);
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+}
+
+/*
+ * fastreg pipelined in a loop as fast as we can until the user interrupts.
+ * NOTE: every 9 seconds we sleep for 1 second to keep the kernel happy.
+ */
+static void krping_fr_test3(struct krping_cb *cb)
{
struct ib_fast_reg_page_list *pl;
struct ib_send_wr fr, inv, *bad;
@@ -1927,10 +2803,11 @@
int ret;
int size = cb->size;
int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
- time_t start;
+ unsigned long start;
int count = 0;
int scnt = 0;
+
pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
if (IS_ERR(pl)) {
PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl));
@@ -1944,7 +2821,7 @@
}
for (i=0; i<plen; i++)
- pl->page_list[i] = 0xcafebabe | i;
+ pl->page_list[i] = i * PAGE_SIZE;
memset(&fr, 0, sizeof fr);
fr.opcode = IB_WR_FAST_REG_MR;
@@ -1953,6 +2830,7 @@
fr.wr.fast_reg.page_list = pl;
fr.wr.fast_reg.page_list_len = plen;
fr.wr.fast_reg.iova_start = 0;
+ fr.send_flags = IB_SEND_SIGNALED;
fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
fr.next = &inv;
memset(&inv, 0, sizeof inv);
@@ -1964,7 +2842,7 @@
while (1) {
if ((time_uptime - start) >= 9) {
DEBUG_LOG(cb, "fr_test: pausing 1 second! count %u latest size %u plen %u\n", count, size, plen);
- wait_event_interruptible(cb->sem, cb->state == ERROR);
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
if (cb->state == ERROR)
break;
start = time_uptime;
@@ -1984,7 +2862,7 @@
PRINTF(cb, "ib_post_send failed %d\n", ret);
goto err2;
}
- scnt++;
+ scnt+=2;
}
do {
@@ -2008,10 +2886,8 @@
} while (ret == 1);
}
err2:
-#if 0
DEBUG_LOG(cb, "sleeping 1 second\n");
wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
-#endif
DEBUG_LOG(cb, "draining the cq...\n");
do {
ret = ib_poll_cq(cb->cq, 1, &wc);
@@ -2028,7 +2904,128 @@
DEBUG_LOG(cb, "fr_test: done!\n");
ib_dereg_mr(mr);
err1:
+ DEBUG_LOG(cb, "destroying fr page list!\n");
+ ib_free_fast_reg_page_list(pl);
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+}
+
+/*
+ * fastreg 1 and invalidate 1 mr and verify completion.
+ */
+static void krping_fr_test4(struct krping_cb *cb)
+{
+ struct ib_fast_reg_page_list *pl;
+ struct ib_send_wr fr, inv, *bad;
+ struct ib_wc wc;
+ struct ib_mr *mr1;
+ int i;
+ int ret;
+ int size = cb->size;
+ int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ int count = 0;
+
+ pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl)) {
+ PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl));
+ return;
+ }
+
+ mr1 = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr1)) {
+ PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl));
+ goto err1;
+ }
+
+ for (i=0; i<plen; i++)
+ pl->page_list[i] = i * PAGE_SIZE;
+
+ memset(&fr, 0, sizeof fr);
+ fr.opcode = IB_WR_FAST_REG_MR;
+ fr.wr_id = 1;
+ fr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr.wr.fast_reg.length = size;
+ fr.wr.fast_reg.page_list = pl;
+ fr.wr.fast_reg.page_list_len = plen;
+ fr.wr.fast_reg.iova_start = 0;
+ fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr.send_flags = IB_SEND_SIGNALED;
+ fr.wr.fast_reg.rkey = mr1->rkey;
+ fr.next = &inv;
+ memset(&inv, 0, sizeof inv);
+ inv.opcode = IB_WR_LOCAL_INV;
+ inv.ex.invalidate_rkey = mr1->rkey;
+
+ DEBUG_LOG(cb, "%s fr1: stag 0x%x plen %u size %u depth %u\n", __func__, fr.wr.fast_reg.rkey, plen, cb->size, cb->txdepth);
+ ret = ib_post_send(cb->qp, &fr, &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err3;
+ }
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ goto err3;
+ }
+ if (ret == 1) {
+ DEBUG_LOG(cb, "completion status %u wr %s\n",
+ wc.status, wc.wr_id == 1 ? "fr" : "inv");
+ count++;
+ } else if (krping_sigpending()) {
+ PRINTF(cb, "signal!\n");
+ goto err3;
+ }
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ } while (count != 1);
+err3:
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ break;
+ }
+ if (ret == 1) {
+ PRINTF(cb, "completion %u opcode %u\n", wc.status, wc.opcode);
+ }
+ } while (ret == 1);
+ DEBUG_LOG(cb, "destroying fr mr1!\n");
+ ib_dereg_mr(mr1);
+err1:
+ DEBUG_LOG(cb, "destroying fr page list!\n");
ib_free_fast_reg_page_list(pl);
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+}
+
+static void krping_fr_test(struct krping_cb *cb)
+{
+ switch (cb->testnum) {
+ case 1:
+ krping_fr_test1(cb);
+ break;
+ case 2:
+ krping_fr_test2(cb);
+ break;
+ case 3:
+ krping_fr_test3(cb);
+ break;
+ case 4:
+ krping_fr_test4(cb);
+ break;
+ case 5:
+ krping_fr_test5_client(cb);
+ break;
+ case 6:
+ krping_fr_test6_client(cb);
+ break;
+ default:
+ PRINTF(cb, "Unkown frtest num %u\n", cb->testnum);
+ break;
+ }
}
static int krping_connect_client(struct krping_cb *cb)
@@ -2083,7 +3080,7 @@
return -EINTR;
}
- if (cb->mem == FASTREG && !fastreg_supported(cb))
+ if (cb->mem == FASTREG && !fastreg_supported(cb, 0))
return -EINVAL;
DEBUG_LOG(cb, "rdma_resolve_addr - rdma_resolve_route successful\n");
@@ -2266,6 +3263,7 @@
break;
case 'f':
cb->frtest = 1;
+ cb->testnum = optint;
DEBUG_LOG(cb, "fast-reg test!\n");
break;
default:
@@ -2283,18 +3281,11 @@
goto out;
}
- if (cb->server && cb->frtest) {
- PRINTF(cb, "must be client to run frtest\n");
- ret = -EINVAL;
- goto out;
- }
-
if ((cb->frtest + cb->bw + cb->rlat + cb->wlat) > 1) {
PRINTF(cb, "Pick only one test: fr, bw, rlat, wlat\n");
ret = -EINVAL;
goto out;
}
-
if (cb->server_invalidate && cb->mem != FASTREG) {
PRINTF(cb, "server_invalidate only valid with fastreg mem_mode\n");
ret = -EINVAL;
@@ -2307,7 +3298,7 @@
goto out;
}
- if (cb->mem != MR && (cb->wlat || cb->rlat || cb->bw)) {
+ if (cb->mem != MR && (cb->wlat || cb->rlat || cb->bw || cb->frtest)) {
PRINTF(cb, "wlat, rlat, and bw tests only support mem_mode MR\n");
ret = -EINVAL;
goto out;

File Metadata

Mime Type
text/plain
Expires
Fri, Apr 10, 11:19 AM (1 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31223452
Default Alt Text
D5777.id.diff (46 KB)

Event Timeline