Changeset View
Changeset View
Standalone View
Standalone View
sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c
Show First 20 Lines • Show All 257 Lines • ▼ Show 20 Lines | if (qp->ibqp.qp_type == IB_QPT_UD) { | ||||
memset(dgram, 0, sizeof *dgram); | memset(dgram, 0, sizeof *dgram); | ||||
av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn); | av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn); | ||||
s += sizeof(struct mlx4_wqe_datagram_seg); | s += sizeof(struct mlx4_wqe_datagram_seg); | ||||
} | } | ||||
/* Pad the remainder of the WQE with an inline data segment. */ | /* Pad the remainder of the WQE with an inline data segment. */ | ||||
if (size > s) { | if (size > s) { | ||||
inl = wqe + s; | inl = wqe + s; | ||||
inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl)); | inl->byte_count = cpu_to_be32(1U << 31 | (size - s - sizeof *inl)); | ||||
} | } | ||||
ctrl->srcrb_flags = 0; | ctrl->srcrb_flags = 0; | ||||
ctrl->fence_size = size / 16; | ctrl->fence_size = size / 16; | ||||
/* | /* | ||||
* Make sure descriptor is fully written before setting ownership bit | * Make sure descriptor is fully written before setting ownership bit | ||||
* (because HW can start executing as soon as we do). | * (because HW can start executing as soon as we do). | ||||
*/ | */ | ||||
wmb(); | wmb(); | ||||
ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) | | ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) | | ||||
(n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); | (n & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0); | ||||
stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); | stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); | ||||
} | } | ||||
/* Post NOP WQE to prevent wrap-around in the middle of WR */ | /* Post NOP WQE to prevent wrap-around in the middle of WR */ | ||||
static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) | static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) | ||||
{ | { | ||||
unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1)); | unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1)); | ||||
▲ Show 20 Lines • Show All 1,701 Lines • ▼ Show 20 Lines | static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, | ||||
* processing stale work requests. | * processing stale work requests. | ||||
*/ | */ | ||||
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { | if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { | ||||
struct mlx4_wqe_ctrl_seg *ctrl; | struct mlx4_wqe_ctrl_seg *ctrl; | ||||
int i; | int i; | ||||
for (i = 0; i < qp->sq.wqe_cnt; ++i) { | for (i = 0; i < qp->sq.wqe_cnt; ++i) { | ||||
ctrl = get_send_wqe(qp, i); | ctrl = get_send_wqe(qp, i); | ||||
ctrl->owner_opcode = cpu_to_be32(1 << 31); | ctrl->owner_opcode = cpu_to_be32(1U << 31); | ||||
if (qp->sq_max_wqes_per_wr == 1) | if (qp->sq_max_wqes_per_wr == 1) | ||||
ctrl->fence_size = | ctrl->fence_size = | ||||
1 << (qp->sq.wqe_shift - 4); | 1 << (qp->sq.wqe_shift - 4); | ||||
stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); | stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 644 Lines • ▼ Show 20 Lines | static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, | ||||
* Inline data segments may not cross a 64 byte boundary. If | * Inline data segments may not cross a 64 byte boundary. If | ||||
* our UD header is bigger than the space available up to the | * our UD header is bigger than the space available up to the | ||||
* next 64 byte boundary in the WQE, use two inline data | * next 64 byte boundary in the WQE, use two inline data | ||||
* segments to hold the UD header. | * segments to hold the UD header. | ||||
*/ | */ | ||||
spc = MLX4_INLINE_ALIGN - | spc = MLX4_INLINE_ALIGN - | ||||
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); | ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); | ||||
if (header_size <= spc) { | if (header_size <= spc) { | ||||
inl->byte_count = cpu_to_be32(1 << 31 | header_size); | inl->byte_count = cpu_to_be32(1U << 31 | header_size); | ||||
memcpy(inl + 1, sqp->header_buf, header_size); | memcpy(inl + 1, sqp->header_buf, header_size); | ||||
i = 1; | i = 1; | ||||
} else { | } else { | ||||
inl->byte_count = cpu_to_be32(1 << 31 | spc); | inl->byte_count = cpu_to_be32(1U << 31 | spc); | ||||
memcpy(inl + 1, sqp->header_buf, spc); | memcpy(inl + 1, sqp->header_buf, spc); | ||||
inl = (void *) (inl + 1) + spc; | inl = (void *) (inl + 1) + spc; | ||||
memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); | memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); | ||||
/* | /* | ||||
* Need a barrier here to make sure all the data is | * Need a barrier here to make sure all the data is | ||||
* visible before the byte_count field is set. | * visible before the byte_count field is set. | ||||
* Otherwise the HCA prefetcher could grab the 64-byte | * Otherwise the HCA prefetcher could grab the 64-byte | ||||
* chunk with this inline segment and get a valid (!= | * chunk with this inline segment and get a valid (!= | ||||
* 0xffffffff) byte count but stale data, and end up | * 0xffffffff) byte count but stale data, and end up | ||||
* generating a packet with bad headers. | * generating a packet with bad headers. | ||||
* | * | ||||
* The first inline segment's byte_count field doesn't | * The first inline segment's byte_count field doesn't | ||||
* need a barrier, because it comes after a | * need a barrier, because it comes after a | ||||
* control/MLX segment and therefore is at an offset | * control/MLX segment and therefore is at an offset | ||||
* of 16 mod 64. | * of 16 mod 64. | ||||
*/ | */ | ||||
wmb(); | wmb(); | ||||
inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); | inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc)); | ||||
i = 2; | i = 2; | ||||
} | } | ||||
*mlx_seg_len = | *mlx_seg_len = | ||||
ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); | ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); | ||||
return 0; | return 0; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 520 Lines • ▼ Show 20 Lines | for (nreq = 0; wr; ++nreq, wr = wr->next) { | ||||
if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { | if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { | ||||
*bad_wr = wr; | *bad_wr = wr; | ||||
err = -EINVAL; | err = -EINVAL; | ||||
goto out; | goto out; | ||||
} | } | ||||
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | | ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | | ||||
(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0) | blh; | ||||
stamp = ind + qp->sq_spare_wqes; | stamp = ind + qp->sq_spare_wqes; | ||||
ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); | ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); | ||||
/* | /* | ||||
* We can improve latency by not stamping the last | * We can improve latency by not stamping the last | ||||
* send queue WQE until after ringing the doorbell, so | * send queue WQE until after ringing the doorbell, so | ||||
* only stamp here if there are still more WQEs to post. | * only stamp here if there are still more WQEs to post. | ||||
▲ Show 20 Lines • Show All 306 Lines • Show Last 20 Lines |