Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F151605899
D5777.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
46 KB
Referenced Files
None
Subscribers
None
D5777.id.diff
View Options
Index: head/sys/contrib/rdma/krping/krping.c
===================================================================
--- head/sys/contrib/rdma/krping/krping.c
+++ head/sys/contrib/rdma/krping/krping.c
@@ -56,6 +56,7 @@
extern int krping_debug;
#define DEBUG_LOG(cb, x...) if (krping_debug) krping_printf((cb)->cookie, x)
#define PRINTF(cb, x...) krping_printf((cb)->cookie, x)
+#define BIND_INFO 1
MODULE_AUTHOR("Steve Wise");
MODULE_DESCRIPTION("RDMA ping client/server");
@@ -99,7 +100,7 @@
{"poll", OPT_NOPARAM, 'P'},
{"local_dma_lkey", OPT_NOPARAM, 'Z'},
{"read_inv", OPT_NOPARAM, 'R'},
- {"fr", OPT_NOPARAM, 'f'},
+ {"fr", OPT_INT, 'f'},
{NULL, 0, 0}
};
@@ -232,6 +233,7 @@
int txdepth; /* SQ depth */
int local_dma_lkey; /* use 0 for lkey */
int frtest; /* fastreg test */
+ int testnum;
/* CM stuff */
struct rdma_cm_id *cm_id; /* connection on client side,*/
@@ -365,11 +367,7 @@
PRINTF(cb, "cq completion in ERROR state\n");
return;
}
- if (cb->frtest) {
- PRINTF(cb, "cq completion event in frtest!\n");
- return;
- }
- if (!cb->wlat && !cb->rlat && !cb->bw)
+ if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest)
ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
if (wc.status) {
@@ -411,7 +409,7 @@
DEBUG_LOG(cb, "recv completion\n");
cb->stats.recv_bytes += sizeof(cb->recv_buf);
cb->stats.recv_msgs++;
- if (cb->wlat || cb->rlat || cb->bw)
+ if (cb->wlat || cb->rlat || cb->bw || cb->frtest)
ret = server_recv(cb, &wc);
else
ret = cb->server ? server_recv(cb, &wc) :
@@ -464,7 +462,7 @@
return ret;
}
- if (!cb->wlat && !cb->rlat && !cb->bw) {
+ if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) {
wait_event_interruptible(cb->sem, cb->state >= CONNECTED);
if (cb->state == ERROR) {
PRINTF(cb, "wait for CONNECTED state %d\n",
@@ -502,7 +500,7 @@
cb->sq_wr.sg_list = &cb->send_sgl;
cb->sq_wr.num_sge = 1;
- if (cb->server || cb->wlat || cb->rlat || cb->bw) {
+ if (cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
cb->rdma_sgl.addr = cb->rdma_dma_addr;
if (cb->mem == MR)
cb->rdma_sgl.lkey = cb->rdma_mr->lkey;
@@ -531,7 +529,11 @@
case MW:
cb->bind_attr.wr_id = 0xabbaabba;
cb->bind_attr.send_flags = 0; /* unsignaled */
+#ifdef BIND_INFO
cb->bind_attr.bind_info.length = cb->size;
+#else
+ cb->bind_attr.length = cb->size;
+#endif
break;
default:
break;
@@ -646,7 +648,7 @@
buf.size = cb->size;
iovbase = cb->rdma_dma_addr;
cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1,
- IB_ACCESS_LOCAL_WRITE|
+ IB_ACCESS_LOCAL_WRITE|
IB_ACCESS_REMOTE_READ|
IB_ACCESS_REMOTE_WRITE,
&iovbase);
@@ -665,7 +667,7 @@
}
}
- if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+ if (!cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
cb->start_buf = kmalloc(cb->size, GFP_KERNEL);
if (!cb->start_buf) {
@@ -682,9 +684,9 @@
if (cb->mem == MR || cb->mem == MW) {
unsigned flags = IB_ACCESS_REMOTE_READ;
- if (cb->wlat || cb->rlat || cb->bw) {
+ if (cb->wlat || cb->rlat || cb->bw || cb->frtest) {
flags |= IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE;
+ IB_ACCESS_REMOTE_WRITE;
}
buf.addr = cb->start_dma_addr;
@@ -907,15 +909,33 @@
* Update the MW with new buf info.
*/
if (buf == (u64)cb->start_dma_addr) {
+#ifdef BIND_INFO
cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_READ;
cb->bind_attr.bind_info.mr = cb->start_mr;
+#else
+ cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ;
+ cb->bind_attr.mr = cb->start_mr;
+#endif
} else {
+#ifdef BIND_INFO
cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
cb->bind_attr.bind_info.mr = cb->rdma_mr;
+#else
+ cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
+ cb->bind_attr.mr = cb->rdma_mr;
+#endif
}
+#ifdef BIND_INFO
cb->bind_attr.bind_info.addr = buf;
+#else
+ cb->bind_attr.addr = buf;
+#endif
DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n",
+#ifdef BIND_INFO
cb->mw->rkey, buf, cb->bind_attr.bind_info.mr->rkey);
+#else
+ cb->mw->rkey, buf, cb->bind_attr.mr->rkey);
+#endif
ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr);
if (ret) {
PRINTF(cb, "bind mw error %d\n", ret);
@@ -950,7 +970,7 @@
* advertising the rdma buffer. Server side
* sends have no data.
*/
- if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+ if (!cb->server || cb->wlat || cb->rlat || cb->bw || cb->frtest) {
rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate);
info->buf = htonll(buf);
info->rkey = htonl(rkey);
@@ -980,7 +1000,6 @@
cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
cb->rdma_sq_wr.sg_list->length = cb->remote_len;
cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 1);
- cb->rdma_sq_wr.next = NULL;
/* Issue RDMA Read. */
if (cb->read_inv)
@@ -1484,7 +1503,6 @@
PRINTF(cb, "send completiong error %d\n", wc.status);
return;
}
-
wait_event_interruptible(cb->sem, cb->state == ERROR);
}
@@ -1557,9 +1575,10 @@
wait_event_interruptible(cb->sem, cb->state == ERROR);
}
-static int fastreg_supported(struct krping_cb *cb)
+static int fastreg_supported(struct krping_cb *cb, int server)
{
- struct ib_device *dev = cb->child_cm_id->device;
+ struct ib_device *dev = server?cb->child_cm_id->device:
+ cb->cm_id->device;
struct ib_device_attr attr;
int ret;
@@ -1610,158 +1629,259 @@
return -1;
}
- if (cb->mem == FASTREG && !fastreg_supported(cb))
+ if (cb->mem == FASTREG && !fastreg_supported(cb, 1))
return -EINVAL;
return 0;
}
-static void krping_run_server(struct krping_cb *cb)
+/*
+ * sq-depth worth of fastreg + 0B read-inv pairs, reposting them as the reads
+ * complete.
+ * NOTE: every 9 seconds we sleep for 1 second to keep the kernel happy.
+ */
+static void krping_fr_test5(struct krping_cb *cb)
{
- struct ib_recv_wr *bad_wr;
+ struct ib_fast_reg_page_list **pl;
+ struct ib_send_wr *fr, *read, *bad;
+ struct ib_wc wc;
+ struct ib_sge *sgl;
+ u8 key = 0;
+ struct ib_mr **mr;
+ u8 **buf;
+ dma_addr_t *dma_addr;
+ int i;
int ret;
+ int plen = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ time_t start;
+ int count = 0;
+ int scnt;
+ int depth = cb->txdepth >> 1;
- ret = krping_bind_server(cb);
- if (ret)
+ if (!depth) {
+ PRINTF(cb, "txdepth must be > 1 for this test!\n");
return;
-
- ret = krping_setup_qp(cb, cb->child_cm_id);
- if (ret) {
- PRINTF(cb, "setup_qp failed: %d\n", ret);
- goto err0;
}
- ret = krping_setup_buffers(cb);
- if (ret) {
- PRINTF(cb, "krping_setup_buffers failed: %d\n", ret);
+ pl = kzalloc(sizeof *pl * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s pl %p size %lu\n", __func__, pl, sizeof *pl * depth);
+ mr = kzalloc(sizeof *mr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s mr %p size %lu\n", __func__, mr, sizeof *mr * depth);
+ fr = kzalloc(sizeof *fr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s fr %p size %lu\n", __func__, fr, sizeof *fr * depth);
+ sgl = kzalloc(sizeof *sgl * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s sgl %p size %lu\n", __func__, sgl, sizeof *sgl * depth);
+ read = kzalloc(sizeof *read * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s read %p size %lu\n", __func__, read, sizeof *read * depth);
+ buf = kzalloc(sizeof *buf * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s buf %p size %lu\n", __func__, buf, sizeof *buf * depth);
+ dma_addr = kzalloc(sizeof *dma_addr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s dma_addr %p size %lu\n", __func__, dma_addr, sizeof *dma_addr * depth);
+ if (!pl || !mr || !fr || !read || !sgl || !buf || !dma_addr) {
+ PRINTF(cb, "kzalloc failed\n");
goto err1;
}
- ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
- if (ret) {
- PRINTF(cb, "ib_post_recv failed: %d\n", ret);
- goto err2;
- }
-
- ret = krping_accept(cb);
- if (ret) {
- PRINTF(cb, "connect error %d\n", ret);
- goto err2;
- }
-
- if (cb->wlat)
- krping_wlat_test_server(cb);
- else if (cb->rlat)
- krping_rlat_test_server(cb);
- else if (cb->bw)
- krping_bw_test_server(cb);
- else
- krping_test_server(cb);
- rdma_disconnect(cb->child_cm_id);
-err2:
- krping_free_buffers(cb);
-err1:
- krping_free_qp(cb);
-err0:
- rdma_destroy_id(cb->child_cm_id);
-}
-
-static void krping_test_client(struct krping_cb *cb)
-{
- int ping, start, cc, i, ret;
- struct ib_send_wr *bad_wr;
- unsigned char c;
-
- start = 65;
- for (ping = 0; !cb->count || ping < cb->count; ping++) {
- cb->state = RDMA_READ_ADV;
-
- /* Put some ascii text in the buffer. */
- cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping);
- for (i = cc, c = start; i < cb->size; i++) {
- cb->start_buf[i] = c;
- c++;
- if (c > 122)
- c = 65;
+ for (scnt = 0; scnt < depth; scnt++) {
+ pl[scnt] = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl[scnt])) {
+ PRINTF(cb, "alloc_fr_page_list failed %ld\n",
+ PTR_ERR(pl[scnt]));
+ goto err2;
}
- start++;
- if (start > 122)
- start = 65;
- cb->start_buf[cb->size - 1] = 0;
+ DEBUG_LOG(cb, "%s pl[%u] %p\n", __func__, scnt, pl[scnt]);
- krping_format_send(cb, cb->start_dma_addr);
- if (cb->state == ERROR) {
- PRINTF(cb, "krping_format_send failed\n");
- break;
- }
- ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
- if (ret) {
- PRINTF(cb, "post send error %d\n", ret);
- break;
+ mr[scnt] = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr[scnt])) {
+ PRINTF(cb, "alloc_fr failed %ld\n",
+ PTR_ERR(mr[scnt]));
+ goto err2;
}
+ DEBUG_LOG(cb, "%s mr[%u] %p\n", __func__, scnt, mr[scnt]);
+ ib_update_fast_reg_key(mr[scnt], ++key);
- /* Wait for server to ACK */
- wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
- if (cb->state != RDMA_WRITE_ADV) {
- PRINTF(cb,
- "wait for RDMA_WRITE_ADV state %d\n",
- cb->state);
- break;
+ buf[scnt] = kmalloc(cb->size, GFP_KERNEL);
+ if (!buf[scnt]) {
+ PRINTF(cb, "kmalloc failed\n");
+ ret = -ENOMEM;
+ goto err2;
}
-
- krping_format_send(cb, cb->rdma_dma_addr);
- ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ DEBUG_LOG(cb, "%s buf[%u] %p\n", __func__, scnt, buf[scnt]);
+ dma_addr[scnt] = dma_map_single(cb->pd->device->dma_device,
+ buf[scnt], cb->size,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(cb->pd->device->dma_device,
+ dma_addr[scnt])) {
+ PRINTF(cb, "dma_map failed\n");
+ ret = -ENOMEM;
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s dma_addr[%u] %p\n", __func__, scnt, (void *)dma_addr[scnt]);
+ for (i=0; i<plen; i++) {
+ pl[scnt]->page_list[i] = ((unsigned long)dma_addr[scnt] & PAGE_MASK) + (i * PAGE_SIZE);
+ DEBUG_LOG(cb, "%s pl[%u]->page_list[%u] 0x%llx\n",
+ __func__, scnt, i, pl[scnt]->page_list[i]);
+ }
+
+ sgl[scnt].lkey = mr[scnt]->rkey;
+ sgl[scnt].length = cb->size;
+ sgl[scnt].addr = (u64)buf[scnt];
+ DEBUG_LOG(cb, "%s sgl[%u].lkey 0x%x length %u addr 0x%llx\n",
+ __func__, scnt, sgl[scnt].lkey, sgl[scnt].length,
+ sgl[scnt].addr);
+
+ fr[scnt].opcode = IB_WR_FAST_REG_MR;
+ fr[scnt].wr_id = scnt;
+ fr[scnt].send_flags = 0;
+ fr[scnt].wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr[scnt].wr.fast_reg.length = cb->size;
+ fr[scnt].wr.fast_reg.page_list = pl[scnt];
+ fr[scnt].wr.fast_reg.page_list_len = plen;
+ fr[scnt].wr.fast_reg.iova_start = (u64)buf[scnt];
+ fr[scnt].wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr[scnt].wr.fast_reg.rkey = mr[scnt]->rkey;
+ fr[scnt].next = &read[scnt];
+ read[scnt].opcode = IB_WR_RDMA_READ_WITH_INV;
+ read[scnt].wr_id = scnt;
+ read[scnt].send_flags = IB_SEND_SIGNALED;
+ read[scnt].wr.rdma.rkey = cb->remote_rkey;
+ read[scnt].wr.rdma.remote_addr = cb->remote_addr;
+ read[scnt].num_sge = 1;
+ read[scnt].sg_list = &sgl[scnt];
+ ret = ib_post_send(cb->qp, &fr[scnt], &bad);
if (ret) {
- PRINTF(cb, "post send error %d\n", ret);
- break;
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err2;
}
+ }
- /* Wait for the server to say the RDMA Write is complete. */
- wait_event_interruptible(cb->sem,
- cb->state >= RDMA_WRITE_COMPLETE);
- if (cb->state != RDMA_WRITE_COMPLETE) {
- PRINTF(cb,
- "wait for RDMA_WRITE_COMPLETE state %d\n",
- cb->state);
+ start = time_uptime;
+ DEBUG_LOG(cb, "%s starting IO.\n", __func__);
+ while (!cb->count || cb->server || count < cb->count) {
+ if ((time_uptime - start) >= 9) {
+ DEBUG_LOG(cb, "%s pausing 1 tick! count %u\n", __func__,
+ count);
+ wait_event_interruptible_timeout(cb->sem,
+ cb->state == ERROR,
+ 1);
+ if (cb->state == ERROR)
+ break;
+ start = time_uptime;
+ }
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n",
+ ret);
+ goto err2;
+ }
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb,
+ "completion error %u wr_id %lld "
+ "opcode %d\n", wc.status,
+ wc.wr_id, wc.opcode);
+ goto err2;
+ }
+ count++;
+ if (count == cb->count)
+ break;
+ ib_update_fast_reg_key(mr[wc.wr_id], ++key);
+ fr[wc.wr_id].wr.fast_reg.rkey =
+ mr[wc.wr_id]->rkey;
+ sgl[wc.wr_id].lkey = mr[wc.wr_id]->rkey;
+ ret = ib_post_send(cb->qp, &fr[wc.wr_id], &bad);
+ if (ret) {
+ PRINTF(cb,
+ "ib_post_send failed %d\n", ret);
+ goto err2;
+ }
+ } else if (krping_sigpending()) {
+ PRINTF(cb, "signal!\n");
+ goto err2;
+ }
+ } while (ret == 1);
+ }
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+err2:
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
break;
}
-
- if (cb->validate)
- if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {
- PRINTF(cb, "data mismatch!\n");
- break;
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb, "completion error %u "
+ "opcode %u\n", wc.status, wc.opcode);
}
+ }
+ } while (ret == 1);
- if (cb->verbose) {
- if (strlen(cb->rdma_buf) > 128) {
- char msgbuf[128];
-
- strlcpy(msgbuf, cb->rdma_buf, sizeof(msgbuf));
- PRINTF(cb, "ping data stripped: %s\n",
- msgbuf);
- } else
- PRINTF(cb, "ping data: %s\n", cb->rdma_buf);
+ DEBUG_LOG(cb, "destroying fr mrs!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (mr[scnt]) {
+ ib_dereg_mr(mr[scnt]);
+ DEBUG_LOG(cb, "%s dereg mr %p\n", __func__, mr[scnt]);
+ }
+ }
+ DEBUG_LOG(cb, "unmapping/freeing bufs!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (buf[scnt]) {
+ dma_unmap_single(cb->pd->device->dma_device,
+ dma_addr[scnt], cb->size,
+ DMA_BIDIRECTIONAL);
+ kfree(buf[scnt]);
+ DEBUG_LOG(cb, "%s unmap/free buf %p dma_addr %p\n", __func__, buf[scnt], (void *)dma_addr[scnt]);
+ }
+ }
+ DEBUG_LOG(cb, "destroying fr page lists!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (pl[scnt]) {
+ DEBUG_LOG(cb, "%s free pl %p\n", __func__, pl[scnt]);
+ ib_free_fast_reg_page_list(pl[scnt]);
}
-#ifdef SLOW_KRPING
- wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
-#endif
}
+err1:
+ if (pl)
+ kfree(pl);
+ if (mr)
+ kfree(mr);
+ if (fr)
+ kfree(fr);
+ if (read)
+ kfree(read);
+ if (sgl)
+ kfree(sgl);
+ if (buf)
+ kfree(buf);
+ if (dma_addr)
+ kfree(dma_addr);
+}
+static void krping_fr_test_server(struct krping_cb *cb)
+{
+ DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
}
-static void krping_rlat_test_client(struct krping_cb *cb)
+static void krping_fr_test5_server(struct krping_cb *cb)
{
struct ib_send_wr *bad_wr;
struct ib_wc wc;
int ret;
- cb->state = RDMA_READ_ADV;
+ /* Spin waiting for client's Start STAG/TO/Len */
+ while (cb->state < RDMA_READ_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+ DEBUG_LOG(cb, "%s client STAG %x TO 0x%llx\n", __func__,
+ cb->remote_rkey, cb->remote_addr);
/* Send STAG/TO/Len to client */
krping_format_send(cb, cb->start_dma_addr);
- if (cb->state == ERROR) {
- PRINTF(cb, "krping_format_send failed\n");
- return;
- }
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
PRINTF(cb, "post send error %d\n", ret);
@@ -1775,84 +1895,31 @@
return;
}
if (wc.status) {
- PRINTF(cb, "send completion error %d\n", wc.status);
+ PRINTF(cb, "send completiong error %d\n", wc.status);
return;
}
- /* Spin waiting for server's Start STAG/TO/Len */
- while (cb->state < RDMA_WRITE_ADV) {
- krping_cq_event_handler(cb->cq, cb);
- }
-
-#if 0
-{
- int i;
- struct timeval start, stop;
- time_t sec;
- suseconds_t usec;
- unsigned long long elapsed;
- struct ib_wc wc;
- struct ib_send_wr *bad_wr;
- int ne;
-
- cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
- cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
- cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
- cb->rdma_sq_wr.sg_list->length = 0;
- cb->rdma_sq_wr.num_sge = 0;
-
- microtime(&start);
- for (i=0; i < 100000; i++) {
- if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
- PRINTF(cb, "Couldn't post send\n");
- return;
- }
- do {
- ne = ib_poll_cq(cb->cq, 1, &wc);
- } while (ne == 0);
- if (ne < 0) {
- PRINTF(cb, "poll CQ failed %d\n", ne);
- return;
- }
- if (wc.status != IB_WC_SUCCESS) {
- PRINTF(cb, "Completion wth error at %s:\n",
- cb->server ? "server" : "client");
- PRINTF(cb, "Failed status %d: wr_id %d\n",
- wc.status, (int) wc.wr_id);
- return;
- }
- }
- microtime(&stop);
-
- if (stop.tv_usec < start.tv_usec) {
- stop.tv_usec += 1000000;
- stop.tv_sec -= 1;
- }
- sec = stop.tv_sec - start.tv_sec;
- usec = stop.tv_usec - start.tv_usec;
- elapsed = sec * 1000000 + usec;
- PRINTF(cb, "0B-write-lat iters 100000 usec %llu\n", elapsed);
-}
-#endif
-
- rlat_test(cb);
+ if (cb->duplex)
+ krping_fr_test5(cb);
+ DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
}
-static void krping_wlat_test_client(struct krping_cb *cb)
+static void krping_fr_test5_client(struct krping_cb *cb)
{
- struct ib_send_wr *bad_wr;
+ struct ib_send_wr *bad;
struct ib_wc wc;
int ret;
cb->state = RDMA_READ_ADV;
- /* Send STAG/TO/Len to client */
+ /* Send STAG/TO/Len to server */
krping_format_send(cb, cb->start_dma_addr);
if (cb->state == ERROR) {
PRINTF(cb, "krping_format_send failed\n");
return;
}
- ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad);
if (ret) {
PRINTF(cb, "post send error %d\n", ret);
return;
@@ -1873,15 +1940,619 @@
while (cb->state < RDMA_WRITE_ADV) {
krping_cq_event_handler(cb->cq, cb);
}
+ DEBUG_LOG(cb, "%s server STAG %x TO 0x%llx\n", __func__, cb->remote_rkey, cb->remote_addr);
- wlat_test(cb);
+ return krping_fr_test5(cb);
}
-static void krping_bw_test_client(struct krping_cb *cb)
+/*
+ * sq-depth worth of write + fastreg + inv, reposting them as the invs
+ * complete.
+ * NOTE: every 9 seconds we sleep for 1 second to keep the kernel happy.
+ * If a count is given, then the last IO will have a bogus lkey in the
+ * write work request. This reproduces a fw bug where the connection
+ * will get stuck if a fastreg is processed while the ulptx is failing
+ * the bad write.
+ */
+static void krping_fr_test6(struct krping_cb *cb)
{
- struct ib_send_wr *bad_wr;
+ struct ib_fast_reg_page_list **pl;
+ struct ib_send_wr *fr, *write, *inv, *bad;
struct ib_wc wc;
- int ret;
+ struct ib_sge *sgl;
+ u8 key = 0;
+ struct ib_mr **mr;
+ u8 **buf;
+ dma_addr_t *dma_addr;
+ int i;
+ int ret;
+ int plen = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ unsigned long start;
+ int count = 0;
+ int scnt;
+ int depth = cb->txdepth / 3;
+
+ if (!depth) {
+ PRINTF(cb, "txdepth must be > 3 for this test!\n");
+ return;
+ }
+
+ pl = kzalloc(sizeof *pl * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s pl %p size %lu\n", __func__, pl, sizeof *pl * depth);
+
+ mr = kzalloc(sizeof *mr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s mr %p size %lu\n", __func__, mr, sizeof *mr * depth);
+
+ fr = kzalloc(sizeof *fr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s fr %p size %lu\n", __func__, fr, sizeof *fr * depth);
+
+ sgl = kzalloc(sizeof *sgl * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s sgl %p size %lu\n", __func__, sgl, sizeof *sgl * depth);
+
+ write = kzalloc(sizeof *write * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s read %p size %lu\n", __func__, write, sizeof *write * depth);
+
+ inv = kzalloc(sizeof *inv * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s inv %p size %lu\n", __func__, inv, sizeof *inv * depth);
+
+ buf = kzalloc(sizeof *buf * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s buf %p size %lu\n", __func__, buf, sizeof *buf * depth);
+
+ dma_addr = kzalloc(sizeof *dma_addr * depth, GFP_KERNEL);
+ DEBUG_LOG(cb, "%s dma_addr %p size %lu\n", __func__, dma_addr, sizeof *dma_addr * depth);
+
+ if (!pl || !mr || !fr || !write || !sgl || !buf || !dma_addr) {
+ PRINTF(cb, "kzalloc failed\n");
+ goto err1;
+ }
+
+ for (scnt = 0; scnt < depth; scnt++) {
+ pl[scnt] = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl[scnt])) {
+ PRINTF(cb, "alloc_fr_page_list failed %ld\n",
+ PTR_ERR(pl[scnt]));
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s pl[%u] %p\n", __func__, scnt, pl[scnt]);
+
+ mr[scnt] = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr[scnt])) {
+ PRINTF(cb, "alloc_fr failed %ld\n",
+ PTR_ERR(mr[scnt]));
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s mr[%u] %p\n", __func__, scnt, mr[scnt]);
+ ib_update_fast_reg_key(mr[scnt], ++key);
+
+ buf[scnt] = kmalloc(cb->size, GFP_KERNEL);
+ if (!buf[scnt]) {
+ PRINTF(cb, "kmalloc failed\n");
+ ret = -ENOMEM;
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s buf[%u] %p\n", __func__, scnt, buf[scnt]);
+ dma_addr[scnt] = dma_map_single(cb->pd->device->dma_device,
+ buf[scnt], cb->size,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(cb->pd->device->dma_device,
+ dma_addr[scnt])) {
+ PRINTF(cb, "dma_map failed\n");
+ ret = -ENOMEM;
+ goto err2;
+ }
+ DEBUG_LOG(cb, "%s dma_addr[%u] %p\n", __func__, scnt, (void *)dma_addr[scnt]);
+ for (i=0; i<plen; i++) {
+ pl[scnt]->page_list[i] = ((unsigned long)dma_addr[scnt] & PAGE_MASK) + (i * PAGE_SIZE);
+ DEBUG_LOG(cb, "%s pl[%u]->page_list[%u] 0x%llx\n",
+ __func__, scnt, i, pl[scnt]->page_list[i]);
+ }
+
+ write[scnt].opcode = IB_WR_RDMA_WRITE;
+ write[scnt].wr_id = scnt;
+ write[scnt].wr.rdma.rkey = cb->remote_rkey;
+ write[scnt].wr.rdma.remote_addr = cb->remote_addr;
+ write[scnt].num_sge = 1;
+ write[scnt].sg_list = &cb->rdma_sgl;
+ write[scnt].sg_list->length = cb->size;
+ write[scnt].next = &fr[scnt];
+
+ fr[scnt].opcode = IB_WR_FAST_REG_MR;
+ fr[scnt].wr_id = scnt;
+ fr[scnt].wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr[scnt].wr.fast_reg.length = cb->size;
+ fr[scnt].wr.fast_reg.page_list = pl[scnt];
+ fr[scnt].wr.fast_reg.page_list_len = plen;
+ fr[scnt].wr.fast_reg.iova_start = (u64)buf[scnt];
+ fr[scnt].wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr[scnt].wr.fast_reg.rkey = mr[scnt]->rkey;
+ fr[scnt].next = &inv[scnt];
+
+ inv[scnt].opcode = IB_WR_LOCAL_INV;
+ inv[scnt].send_flags = IB_SEND_SIGNALED;
+ inv[scnt].ex.invalidate_rkey = mr[scnt]->rkey;
+
+ ret = ib_post_send(cb->qp, &write[scnt], &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err2;
+ }
+ }
+
+ start = time_uptime;
+ DEBUG_LOG(cb, "%s starting IO.\n", __func__);
+ while (!cb->count || cb->server || count < cb->count) {
+ if ((time_uptime - start) >= 9) {
+ DEBUG_LOG(cb, "%s pausing 1 tick! count %u\n", __func__,
+ count);
+ wait_event_interruptible_timeout(cb->sem,
+ cb->state == ERROR,
+ 1);
+ if (cb->state == ERROR)
+ break;
+ start = time_uptime;
+ }
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n",
+ ret);
+ goto err2;
+ }
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb,
+ "completion error %u wr_id %lld "
+ "opcode %d\n", wc.status,
+ wc.wr_id, wc.opcode);
+ goto err2;
+ }
+ count++;
+ if (count == (cb->count -1))
+ cb->rdma_sgl.lkey = 0x00dead;
+ if (count == cb->count)
+ break;
+ ib_update_fast_reg_key(mr[wc.wr_id], ++key);
+ fr[wc.wr_id].wr.fast_reg.rkey =
+ mr[wc.wr_id]->rkey;
+ inv[wc.wr_id].ex.invalidate_rkey =
+ mr[wc.wr_id]->rkey;
+ ret = ib_post_send(cb->qp, &write[wc.wr_id], &bad);
+ if (ret) {
+ PRINTF(cb,
+ "ib_post_send failed %d\n", ret);
+ goto err2;
+ }
+ } else if (krping_sigpending()){
+ PRINTF(cb, "signal!\n");
+ goto err2;
+ }
+ } while (ret == 1);
+ }
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+err2:
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ break;
+ }
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb, "completion error %u "
+ "opcode %u\n", wc.status, wc.opcode);
+ }
+ }
+ } while (ret == 1);
+
+ DEBUG_LOG(cb, "destroying fr mrs!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (mr[scnt]) {
+ ib_dereg_mr(mr[scnt]);
+ DEBUG_LOG(cb, "%s dereg mr %p\n", __func__, mr[scnt]);
+ }
+ }
+ DEBUG_LOG(cb, "unmapping/freeing bufs!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (buf[scnt]) {
+ dma_unmap_single(cb->pd->device->dma_device,
+ dma_addr[scnt], cb->size,
+ DMA_BIDIRECTIONAL);
+ kfree(buf[scnt]);
+ DEBUG_LOG(cb, "%s unmap/free buf %p dma_addr %p\n", __func__, buf[scnt], (void *)dma_addr[scnt]);
+ }
+ }
+ DEBUG_LOG(cb, "destroying fr page lists!\n");
+ for (scnt = 0; scnt < depth; scnt++) {
+ if (pl[scnt]) {
+ DEBUG_LOG(cb, "%s free pl %p\n", __func__, pl[scnt]);
+ ib_free_fast_reg_page_list(pl[scnt]);
+ }
+ }
+err1:
+ if (pl)
+ kfree(pl);
+ if (mr)
+ kfree(mr);
+ if (fr)
+ kfree(fr);
+ if (write)
+ kfree(write);
+ if (inv)
+ kfree(inv);
+ if (sgl)
+ kfree(sgl);
+ if (buf)
+ kfree(buf);
+ if (dma_addr)
+ kfree(dma_addr);
+}
+
+static void krping_fr_test6_server(struct krping_cb *cb)
+{
+ struct ib_send_wr *bad_wr;
+ struct ib_wc wc;
+ int ret;
+
+ /* Spin waiting for client's Start STAG/TO/Len */
+ while (cb->state < RDMA_READ_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+ DEBUG_LOG(cb, "%s client STAG %x TO 0x%llx\n", __func__,
+ cb->remote_rkey, cb->remote_addr);
+
+ /* Send STAG/TO/Len to client */
+ krping_format_send(cb, cb->start_dma_addr);
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ return;
+ }
+
+ /* Spin waiting for send completion */
+ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+ if (ret < 0) {
+ PRINTF(cb, "poll error %d\n", ret);
+ return;
+ }
+ if (wc.status) {
+ PRINTF(cb, "send completiong error %d\n", wc.status);
+ return;
+ }
+
+ if (cb->duplex)
+ krping_fr_test6(cb);
+ DEBUG_LOG(cb, "%s waiting for disconnect...\n", __func__);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
+}
+
+static void krping_fr_test6_client(struct krping_cb *cb)
+{
+ struct ib_send_wr *bad;
+ struct ib_wc wc;
+ int ret;
+
+ cb->state = RDMA_READ_ADV;
+
+ /* Send STAG/TO/Len to server */
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ return;
+ }
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ return;
+ }
+
+ /* Spin waiting for send completion */
+ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+ if (ret < 0) {
+ PRINTF(cb, "poll error %d\n", ret);
+ return;
+ }
+ if (wc.status) {
+ PRINTF(cb, "send completion error %d\n", wc.status);
+ return;
+ }
+
+ /* Spin waiting for server's Start STAG/TO/Len */
+ while (cb->state < RDMA_WRITE_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+ DEBUG_LOG(cb, "%s server STAG %x TO 0x%llx\n", __func__, cb->remote_rkey, cb->remote_addr);
+
+ return krping_fr_test6(cb);
+}
+
+static void krping_run_server(struct krping_cb *cb)
+{
+ struct ib_recv_wr *bad_wr;
+ int ret;
+
+ ret = krping_bind_server(cb);
+ if (ret)
+ return;
+
+ ret = krping_setup_qp(cb, cb->child_cm_id);
+ if (ret) {
+ PRINTF(cb, "setup_qp failed: %d\n", ret);
+ goto err0;
+ }
+
+ ret = krping_setup_buffers(cb);
+ if (ret) {
+ PRINTF(cb, "krping_setup_buffers failed: %d\n", ret);
+ goto err1;
+ }
+
+ ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "ib_post_recv failed: %d\n", ret);
+ goto err2;
+ }
+
+ ret = krping_accept(cb);
+ if (ret) {
+ PRINTF(cb, "connect error %d\n", ret);
+ goto err2;
+ }
+
+ if (cb->wlat)
+ krping_wlat_test_server(cb);
+ else if (cb->rlat)
+ krping_rlat_test_server(cb);
+ else if (cb->bw)
+ krping_bw_test_server(cb);
+ else if (cb->frtest) {
+ switch (cb->testnum) {
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ krping_fr_test_server(cb);
+ break;
+ case 5:
+ krping_fr_test5_server(cb);
+ break;
+ case 6:
+ krping_fr_test6_server(cb);
+ break;
+ default:
+ PRINTF(cb, "unknown fr test %d\n", cb->testnum);
+ goto err2;
+ break;
+ }
+ } else
+ krping_test_server(cb);
+ rdma_disconnect(cb->child_cm_id);
+err2:
+ krping_free_buffers(cb);
+err1:
+ krping_free_qp(cb);
+err0:
+ rdma_destroy_id(cb->child_cm_id);
+}
+
+static void krping_test_client(struct krping_cb *cb)
+{
+ int ping, start, cc, i, ret;
+ struct ib_send_wr *bad_wr;
+ unsigned char c;
+
+ start = 65;
+ for (ping = 0; !cb->count || ping < cb->count; ping++) {
+ cb->state = RDMA_READ_ADV;
+
+ /* Put some ascii text in the buffer. */
+ cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping);
+ for (i = cc, c = start; i < cb->size; i++) {
+ cb->start_buf[i] = c;
+ c++;
+ if (c > 122)
+ c = 65;
+ }
+ start++;
+ if (start > 122)
+ start = 65;
+ cb->start_buf[cb->size - 1] = 0;
+
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ break;
+ }
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ break;
+ }
+
+ /* Wait for server to ACK */
+ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
+ if (cb->state != RDMA_WRITE_ADV) {
+ PRINTF(cb,
+ "wait for RDMA_WRITE_ADV state %d\n",
+ cb->state);
+ break;
+ }
+
+ krping_format_send(cb, cb->rdma_dma_addr);
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ break;
+ }
+
+ /* Wait for the server to say the RDMA Write is complete. */
+ wait_event_interruptible(cb->sem,
+ cb->state >= RDMA_WRITE_COMPLETE);
+ if (cb->state != RDMA_WRITE_COMPLETE) {
+ PRINTF(cb,
+ "wait for RDMA_WRITE_COMPLETE state %d\n",
+ cb->state);
+ break;
+ }
+
+ if (cb->validate)
+ if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {
+ PRINTF(cb, "data mismatch!\n");
+ break;
+ }
+
+ if (cb->verbose) {
+ if (strlen(cb->rdma_buf) > 128) {
+ char msgbuf[128];
+
+ strlcpy(msgbuf, cb->rdma_buf, sizeof(msgbuf));
+ PRINTF(cb, "ping data stripped: %s\n",
+ msgbuf);
+ } else
+ PRINTF(cb, "ping data: %s\n", cb->rdma_buf);
+ }
+#ifdef SLOW_KRPING
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+#endif
+ }
+}
+
+static void krping_rlat_test_client(struct krping_cb *cb)
+{
+ struct ib_send_wr *bad_wr;
+ struct ib_wc wc;
+ int ret;
+
+ cb->state = RDMA_READ_ADV;
+
+ /* Send STAG/TO/Len to client */
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ return;
+ }
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ return;
+ }
+
+ /* Spin waiting for send completion */
+ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+ if (ret < 0) {
+ PRINTF(cb, "poll error %d\n", ret);
+ return;
+ }
+ if (wc.status) {
+ PRINTF(cb, "send completion error %d\n", wc.status);
+ return;
+ }
+
+ /* Spin waiting for server's Start STAG/TO/Len */
+ while (cb->state < RDMA_WRITE_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+
+#if 0
+{
+ int i;
+ struct timeval start, stop;
+ time_t sec;
+ suseconds_t usec;
+ unsigned long long elapsed;
+ struct ib_wc wc;
+ struct ib_send_wr *bad_wr;
+ int ne;
+
+ cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
+ cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
+ cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
+ cb->rdma_sq_wr.sg_list->length = 0;
+ cb->rdma_sq_wr.num_sge = 0;
+
+ microtime(&start);
+ for (i=0; i < 100000; i++) {
+ if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
+ PRINTF(cb, "Couldn't post send\n");
+ return;
+ }
+ do {
+ ne = ib_poll_cq(cb->cq, 1, &wc);
+ } while (ne == 0);
+ if (ne < 0) {
+ PRINTF(cb, "poll CQ failed %d\n", ne);
+ return;
+ }
+ if (wc.status != IB_WC_SUCCESS) {
+ PRINTF(cb, "Completion wth error at %s:\n",
+ cb->server ? "server" : "client");
+ PRINTF(cb, "Failed status %d: wr_id %d\n",
+ wc.status, (int) wc.wr_id);
+ return;
+ }
+ }
+ microtime(&stop);
+
+ if (stop.tv_usec < start.tv_usec) {
+ stop.tv_usec += 1000000;
+ stop.tv_sec -= 1;
+ }
+ sec = stop.tv_sec - start.tv_sec;
+ usec = stop.tv_usec - start.tv_usec;
+ elapsed = sec * 1000000 + usec;
+ PRINTF(cb, "0B-write-lat iters 100000 usec %llu\n", elapsed);
+}
+#endif
+
+ rlat_test(cb);
+}
+
+static void krping_wlat_test_client(struct krping_cb *cb)
+{
+ struct ib_send_wr *bad_wr;
+ struct ib_wc wc;
+ int ret;
+
+ cb->state = RDMA_READ_ADV;
+
+ /* Send STAG/TO/Len to client */
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ return;
+ }
+ ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ return;
+ }
+
+ /* Spin waiting for send completion */
+ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+ if (ret < 0) {
+ PRINTF(cb, "poll error %d\n", ret);
+ return;
+ }
+ if (wc.status) {
+ PRINTF(cb, "send completion error %d\n", wc.status);
+ return;
+ }
+
+ /* Spin waiting for server's Start STAG/TO/Len */
+ while (cb->state < RDMA_WRITE_ADV) {
+ krping_cq_event_handler(cb->cq, cb);
+ }
+
+ wlat_test(cb);
+}
+
+static void krping_bw_test_client(struct krping_cb *cb)
+{
+ struct ib_send_wr *bad_wr;
+ struct ib_wc wc;
+ int ret;
cb->state = RDMA_READ_ADV;
@@ -1916,7 +2587,212 @@
bw_test(cb);
}
-static void krping_fr_test(struct krping_cb *cb)
+
+/*
+ * fastreg 2 valid different mrs and verify the completions.
+ */
+static void krping_fr_test1(struct krping_cb *cb)
+{
+ struct ib_fast_reg_page_list *pl;
+ struct ib_send_wr fr, *bad;
+ struct ib_wc wc;
+ struct ib_mr *mr1, *mr2;
+ int i;
+ int ret;
+ int size = cb->size;
+ int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ int count = 0;
+
+ pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl)) {
+ PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl));
+ return;
+ }
+
+ mr1 = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr1)) {
+ PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl));
+ goto err1;
+ }
+ mr2 = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr2)) {
+ PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl));
+ goto err2;
+ }
+
+
+ for (i=0; i<plen; i++)
+ pl->page_list[i] = i * PAGE_SIZE;
+
+ memset(&fr, 0, sizeof fr);
+ fr.opcode = IB_WR_FAST_REG_MR;
+ fr.wr_id = 1;
+ fr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr.wr.fast_reg.length = size;
+ fr.wr.fast_reg.page_list = pl;
+ fr.wr.fast_reg.page_list_len = plen;
+ fr.wr.fast_reg.iova_start = 0;
+ fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr.send_flags = IB_SEND_SIGNALED;
+ fr.wr.fast_reg.rkey = mr1->rkey;
+ DEBUG_LOG(cb, "%s fr1: stag 0x%x plen %u size %u depth %u\n", __func__, fr.wr.fast_reg.rkey, plen, cb->size, cb->txdepth);
+ ret = ib_post_send(cb->qp, &fr, &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err3;
+ }
+ fr.wr.fast_reg.rkey = mr2->rkey;
+ DEBUG_LOG(cb, "%s fr2: stag 0x%x plen %u size %u depth %u\n", __func__, fr.wr.fast_reg.rkey, plen, cb->size, cb->txdepth);
+ ret = ib_post_send(cb->qp, &fr, &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err3;
+ }
+
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ goto err3;
+ }
+ if (ret == 1) {
+ DEBUG_LOG(cb, "completion status %u wr %s\n",
+ wc.status, wc.wr_id == 1 ? "fr" : "inv");
+ count++;
+ } else if (krping_sigpending()) {
+ PRINTF(cb, "signal!\n");
+ goto err3;
+ }
+
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ } while (count != 2);
+err3:
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ break;
+ }
+ if (ret == 1) {
+ PRINTF(cb, "completion %u opcode %u\n", wc.status, wc.opcode);
+ }
+ } while (ret == 1);
+ DEBUG_LOG(cb, "destroying fr mr2!\n");
+
+ ib_dereg_mr(mr2);
+err2:
+ DEBUG_LOG(cb, "destroying fr mr1!\n");
+ ib_dereg_mr(mr1);
+err1:
+ DEBUG_LOG(cb, "destroying fr page list!\n");
+ ib_free_fast_reg_page_list(pl);
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+}
+
+/*
+ * fastreg the same mr twice, 2nd one should produce error cqe.
+ */
+static void krping_fr_test2(struct krping_cb *cb)
+{
+ struct ib_fast_reg_page_list *pl;
+ struct ib_send_wr fr, *bad;
+ struct ib_wc wc;
+ struct ib_mr *mr1;
+ int i;
+ int ret;
+ int size = cb->size;
+ int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ int count = 0;
+
+ pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl)) {
+ PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl));
+ return;
+ }
+
+ mr1 = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr1)) {
+ PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl));
+ goto err1;
+ }
+
+ for (i=0; i<plen; i++)
+ pl->page_list[i] = i * PAGE_SIZE;
+
+ memset(&fr, 0, sizeof fr);
+ fr.opcode = IB_WR_FAST_REG_MR;
+ fr.wr_id = 1;
+ fr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr.wr.fast_reg.length = size;
+ fr.wr.fast_reg.page_list = pl;
+ fr.wr.fast_reg.page_list_len = plen;
+ fr.wr.fast_reg.iova_start = 0;
+ fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr.send_flags = IB_SEND_SIGNALED;
+ fr.wr.fast_reg.rkey = mr1->rkey;
+ DEBUG_LOG(cb, "%s fr1: stag 0x%x plen %u size %u depth %u\n", __func__, fr.wr.fast_reg.rkey, plen, cb->size, cb->txdepth);
+ ret = ib_post_send(cb->qp, &fr, &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err3;
+ }
+ DEBUG_LOG(cb, "%s fr2: stag 0x%x plen %u size %u depth %u\n", __func__, fr.wr.fast_reg.rkey, plen, cb->size, cb->txdepth);
+ ret = ib_post_send(cb->qp, &fr, &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err3;
+ }
+
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ goto err3;
+ }
+ if (ret == 1) {
+ DEBUG_LOG(cb, "completion status %u wr %s\n",
+ wc.status, wc.wr_id == 1 ? "fr" : "inv");
+ count++;
+ } else if (krping_sigpending()) {
+ PRINTF(cb, "signal!\n");
+ goto err3;
+ }
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ } while (count != 2);
+err3:
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ break;
+ }
+ if (ret == 1) {
+ PRINTF(cb, "completion %u opcode %u\n", wc.status, wc.opcode);
+ }
+ } while (ret == 1);
+ DEBUG_LOG(cb, "destroying fr mr1!\n");
+ ib_dereg_mr(mr1);
+err1:
+ DEBUG_LOG(cb, "destroying fr page list!\n");
+ ib_free_fast_reg_page_list(pl);
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+}
+
+/*
+ * fastreg pipelined in a loop as fast as we can until the user interrupts.
+ * NOTE: every 9 seconds we sleep for 1 second to keep the kernel happy.
+ */
+static void krping_fr_test3(struct krping_cb *cb)
{
struct ib_fast_reg_page_list *pl;
struct ib_send_wr fr, inv, *bad;
@@ -1927,10 +2803,11 @@
int ret;
int size = cb->size;
int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
- time_t start;
+ unsigned long start;
int count = 0;
int scnt = 0;
+
pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
if (IS_ERR(pl)) {
PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl));
@@ -1944,7 +2821,7 @@
}
for (i=0; i<plen; i++)
- pl->page_list[i] = 0xcafebabe | i;
+ pl->page_list[i] = i * PAGE_SIZE;
memset(&fr, 0, sizeof fr);
fr.opcode = IB_WR_FAST_REG_MR;
@@ -1953,6 +2830,7 @@
fr.wr.fast_reg.page_list = pl;
fr.wr.fast_reg.page_list_len = plen;
fr.wr.fast_reg.iova_start = 0;
+ fr.send_flags = IB_SEND_SIGNALED;
fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
fr.next = &inv;
memset(&inv, 0, sizeof inv);
@@ -1964,7 +2842,7 @@
while (1) {
if ((time_uptime - start) >= 9) {
DEBUG_LOG(cb, "fr_test: pausing 1 second! count %u latest size %u plen %u\n", count, size, plen);
- wait_event_interruptible(cb->sem, cb->state == ERROR);
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
if (cb->state == ERROR)
break;
start = time_uptime;
@@ -1984,7 +2862,7 @@
PRINTF(cb, "ib_post_send failed %d\n", ret);
goto err2;
}
- scnt++;
+ scnt+=2;
}
do {
@@ -2008,10 +2886,8 @@
} while (ret == 1);
}
err2:
-#if 0
DEBUG_LOG(cb, "sleeping 1 second\n");
wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
-#endif
DEBUG_LOG(cb, "draining the cq...\n");
do {
ret = ib_poll_cq(cb->cq, 1, &wc);
@@ -2028,7 +2904,128 @@
DEBUG_LOG(cb, "fr_test: done!\n");
ib_dereg_mr(mr);
err1:
+ DEBUG_LOG(cb, "destroying fr page list!\n");
+ ib_free_fast_reg_page_list(pl);
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+}
+
+/*
+ * fastreg 1 and invalidate 1 mr and verify completion.
+ */
+static void krping_fr_test4(struct krping_cb *cb)
+{
+ struct ib_fast_reg_page_list *pl;
+ struct ib_send_wr fr, inv, *bad;
+ struct ib_wc wc;
+ struct ib_mr *mr1;
+ int i;
+ int ret;
+ int size = cb->size;
+ int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ int count = 0;
+
+ pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl)) {
+ PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl));
+ return;
+ }
+
+ mr1 = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr1)) {
+ PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl));
+ goto err1;
+ }
+
+ for (i=0; i<plen; i++)
+ pl->page_list[i] = i * PAGE_SIZE;
+
+ memset(&fr, 0, sizeof fr);
+ fr.opcode = IB_WR_FAST_REG_MR;
+ fr.wr_id = 1;
+ fr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr.wr.fast_reg.length = size;
+ fr.wr.fast_reg.page_list = pl;
+ fr.wr.fast_reg.page_list_len = plen;
+ fr.wr.fast_reg.iova_start = 0;
+ fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr.send_flags = IB_SEND_SIGNALED;
+ fr.wr.fast_reg.rkey = mr1->rkey;
+ fr.next = &inv;
+ memset(&inv, 0, sizeof inv);
+ inv.opcode = IB_WR_LOCAL_INV;
+ inv.ex.invalidate_rkey = mr1->rkey;
+
+ DEBUG_LOG(cb, "%s fr1: stag 0x%x plen %u size %u depth %u\n", __func__, fr.wr.fast_reg.rkey, plen, cb->size, cb->txdepth);
+ ret = ib_post_send(cb->qp, &fr, &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err3;
+ }
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ goto err3;
+ }
+ if (ret == 1) {
+ DEBUG_LOG(cb, "completion status %u wr %s\n",
+ wc.status, wc.wr_id == 1 ? "fr" : "inv");
+ count++;
+ } else if (krping_sigpending()) {
+ PRINTF(cb, "signal!\n");
+ goto err3;
+ }
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ } while (count != 1);
+err3:
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ break;
+ }
+ if (ret == 1) {
+ PRINTF(cb, "completion %u opcode %u\n", wc.status, wc.opcode);
+ }
+ } while (ret == 1);
+ DEBUG_LOG(cb, "destroying fr mr1!\n");
+ ib_dereg_mr(mr1);
+err1:
+ DEBUG_LOG(cb, "destroying fr page list!\n");
ib_free_fast_reg_page_list(pl);
+ DEBUG_LOG(cb, "%s done!\n", __func__);
+}
+
+static void krping_fr_test(struct krping_cb *cb)
+{
+ switch (cb->testnum) {
+ case 1:
+ krping_fr_test1(cb);
+ break;
+ case 2:
+ krping_fr_test2(cb);
+ break;
+ case 3:
+ krping_fr_test3(cb);
+ break;
+ case 4:
+ krping_fr_test4(cb);
+ break;
+ case 5:
+ krping_fr_test5_client(cb);
+ break;
+ case 6:
+ krping_fr_test6_client(cb);
+ break;
+ default:
+ PRINTF(cb, "Unkown frtest num %u\n", cb->testnum);
+ break;
+ }
}
static int krping_connect_client(struct krping_cb *cb)
@@ -2083,7 +3080,7 @@
return -EINTR;
}
- if (cb->mem == FASTREG && !fastreg_supported(cb))
+ if (cb->mem == FASTREG && !fastreg_supported(cb, 0))
return -EINVAL;
DEBUG_LOG(cb, "rdma_resolve_addr - rdma_resolve_route successful\n");
@@ -2266,6 +3263,7 @@
break;
case 'f':
cb->frtest = 1;
+ cb->testnum = optint;
DEBUG_LOG(cb, "fast-reg test!\n");
break;
default:
@@ -2283,18 +3281,11 @@
goto out;
}
- if (cb->server && cb->frtest) {
- PRINTF(cb, "must be client to run frtest\n");
- ret = -EINVAL;
- goto out;
- }
-
if ((cb->frtest + cb->bw + cb->rlat + cb->wlat) > 1) {
PRINTF(cb, "Pick only one test: fr, bw, rlat, wlat\n");
ret = -EINVAL;
goto out;
}
-
if (cb->server_invalidate && cb->mem != FASTREG) {
PRINTF(cb, "server_invalidate only valid with fastreg mem_mode\n");
ret = -EINVAL;
@@ -2307,7 +3298,7 @@
goto out;
}
- if (cb->mem != MR && (cb->wlat || cb->rlat || cb->bw)) {
+ if (cb->mem != MR && (cb->wlat || cb->rlat || cb->bw || cb->frtest)) {
PRINTF(cb, "wlat, rlat, and bw tests only support mem_mode MR\n");
ret = -EINVAL;
goto out;
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Apr 10, 11:19 AM (1 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31223452
Default Alt Text
D5777.id.diff (46 KB)
Attached To
Mode
D5777: Add fastreg support to krping.
Attached
Detach File
Event Timeline
Log In to Comment