Skip to content

Commit

Permalink
prov/efa: Remove inline write logic for rma inject
Browse files Browse the repository at this point in the history
We previously set the inject size to 0 to prevent using inline write,
but fabtests can use inject rma for 0 size message.
Make rma inject temporarily return FI_ENOSYS before firmware
supports inline write.

Signed-off-by: Jessie Yang <jiaxiyan@amazon.com>
  • Loading branch information
jiaxiyan authored and shijin-aws committed Jan 9, 2025
1 parent c8c36e4 commit d0d2725
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 74 deletions.
80 changes: 14 additions & 66 deletions prov/efa/src/efa_rma.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,17 +200,21 @@ static inline ssize_t efa_rma_post_write(struct efa_base_ep *base_ep,
struct efa_conn *conn;
#ifndef _WIN32
struct ibv_sge sge_list[msg->iov_count];
struct ibv_data_buf inline_data_list[msg->iov_count];
#else
/* MSVC compiler does not support array declarations with runtime size, so hardcode
* the expected iov_limit/max_sq_sge from the lower-level efa provider.
*/
struct ibv_sge sge_list[EFA_DEV_ATTR_MAX_WR_SGE];
struct ibv_data_buf inline_data_list[EFA_DEV_ATTR_MAX_WR_SGE];
#endif
size_t len;
int i, err = 0;

if (flags & FI_INJECT) {
EFA_WARN(FI_LOG_EP_DATA,
"FI_INJECT is not supported by efa rma yet.\n");
return -FI_ENOSYS;
}

efa_tracepoint(write_begin_msg_context, (size_t) msg->context, (size_t) msg->addr);

qp = base_ep->qp;
Expand All @@ -230,24 +234,13 @@ static inline ssize_t efa_rma_post_write(struct efa_base_ep *base_ep,
ibv_wr_rdma_write(qp->ibv_qp_ex, msg->rma_iov[0].key, msg->rma_iov[0].addr);
}

len = ofi_total_iov_len(msg->msg_iov, msg->iov_count);
if (len <= base_ep->domain->device->efa_attr.inline_buf_size &&
len <= base_ep->inject_rma_size &&
(!msg->desc || !efa_mr_is_hmem(msg->desc[0]))) {
for (i = 0; i < msg->iov_count; i++) {
inline_data_list[i].addr = msg->msg_iov[i].iov_base;
inline_data_list[i].length = msg->msg_iov[i].iov_len;
}
ibv_wr_set_inline_data_list(qp->ibv_qp_ex, msg->iov_count, inline_data_list);
} else {
for (i = 0; i < msg->iov_count; ++i) {
sge_list[i].addr = (uint64_t)msg->msg_iov[i].iov_base;
sge_list[i].length = msg->msg_iov[i].iov_len;
assert(msg->desc && msg->desc[i]);
sge_list[i].lkey = ((struct efa_mr *)msg->desc[i])->ibv_mr->lkey;
}
ibv_wr_set_sge_list(qp->ibv_qp_ex, msg->iov_count, sge_list);
for (i = 0; i < msg->iov_count; ++i) {
sge_list[i].addr = (uint64_t)msg->msg_iov[i].iov_base;
sge_list[i].length = msg->msg_iov[i].iov_len;
assert(msg->desc && msg->desc[i]);
sge_list[i].lkey = ((struct efa_mr *)msg->desc[i])->ibv_mr->lkey;
}
ibv_wr_set_sge_list(qp->ibv_qp_ex, msg->iov_count, sge_list);

conn = efa_av_addr_to_conn(base_ep->av, msg->addr);
assert(conn && conn->ep_addr);
Expand Down Expand Up @@ -348,51 +341,6 @@ ssize_t efa_rma_writedata(struct fid_ep *ep_fid, const void *buf, size_t len,
return efa_rma_post_write(base_ep, &msg, FI_REMOTE_CQ_DATA | efa_tx_flags(base_ep));
}

ssize_t efa_rma_inject_write(struct fid_ep *ep_fid, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t addr, uint64_t key)
{
struct fi_msg_rma msg;
struct iovec iov;
struct fi_rma_iov rma_iov;
struct efa_base_ep *base_ep;
int err;

base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid);
assert(len <= base_ep->inject_rma_size);
err = efa_rma_check_cap(base_ep);
if (err)
return err;

EFA_SETUP_IOV(iov, buf, len);
EFA_SETUP_RMA_IOV(rma_iov, addr, len, key);
EFA_SETUP_MSG_RMA(msg, &iov, NULL, 1, dest_addr, &rma_iov, 1, NULL, 0);

return efa_rma_post_write(base_ep, &msg, FI_INJECT);
}

ssize_t efa_rma_inject_writedata(struct fid_ep *ep_fid, const void *buf,
size_t len, uint64_t data, fi_addr_t dest_addr,
uint64_t addr, uint64_t key)
{
struct fi_msg_rma msg;
struct iovec iov;
struct fi_rma_iov rma_iov;
struct efa_base_ep *base_ep;
int err;

base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid);
assert(len <= base_ep->inject_rma_size);
err = efa_rma_check_cap(base_ep);
if (err)
return err;

EFA_SETUP_IOV(iov, buf, len);
EFA_SETUP_RMA_IOV(rma_iov, addr, len, key);
EFA_SETUP_MSG_RMA(msg, &iov, NULL, 1, dest_addr, &rma_iov, 1, NULL, data);

return efa_rma_post_write(base_ep, &msg, FI_INJECT | FI_REMOTE_CQ_DATA);
}

struct fi_ops_rma efa_dgram_ep_rma_ops = {
.size = sizeof(struct fi_ops_rma),
.read = fi_no_rma_read,
Expand All @@ -414,7 +362,7 @@ struct fi_ops_rma efa_rma_ops = {
.write = efa_rma_write,
.writev = efa_rma_writev,
.writemsg = efa_rma_writemsg,
.inject = efa_rma_inject_write,
.inject = fi_no_rma_inject,
.writedata = efa_rma_writedata,
.injectdata = efa_rma_inject_writedata,
.injectdata = fi_no_rma_injectdata,
};
39 changes: 31 additions & 8 deletions prov/efa/test/efa_unit_test_rma.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ static void test_efa_rma_prep(struct efa_resource *resource, fi_addr_t *addr)
ibv_qpx->wr_rdma_read = &efa_mock_ibv_wr_rdma_read_save_wr;
ibv_qpx->wr_rdma_write = &efa_mock_ibv_wr_rdma_write_save_wr;
ibv_qpx->wr_rdma_write_imm = &efa_mock_ibv_wr_rdma_write_imm_save_wr;
ibv_qpx->wr_set_inline_data_list =
&efa_mock_ibv_wr_set_inline_data_list_no_op;
ibv_qpx->wr_set_sge_list = &efa_mock_ibv_wr_set_sge_list_no_op;
ibv_qpx->wr_set_ud_addr = &efa_mock_ibv_wr_set_ud_addr_no_op;
ibv_qpx->wr_complete = &efa_mock_ibv_wr_complete_no_op;
Expand Down Expand Up @@ -241,11 +239,9 @@ void test_efa_rma_inject_write(struct efa_resource **state)
test_efa_rma_prep(resource, &dest_addr);
efa_unit_test_buff_construct(&local_buff, resource, 32 /* buff_size */);

assert_int_equal(g_ibv_submitted_wr_id_cnt, 0);
ret = fi_inject_write(resource->ep, local_buff.buff, local_buff.size,
dest_addr, remote_addr, remote_key);
assert_int_equal(ret, 0);
assert_int_equal(g_ibv_submitted_wr_id_cnt, 1);
assert_int_equal(ret, -FI_ENOSYS);

efa_unit_test_buff_destruct(&local_buff);
}
Expand All @@ -262,12 +258,39 @@ void test_efa_rma_inject_writedata(struct efa_resource **state)
test_efa_rma_prep(resource, &dest_addr);
efa_unit_test_buff_construct(&local_buff, resource, 32 /* buff_size */);

assert_int_equal(g_ibv_submitted_wr_id_cnt, 0);
ret = fi_inject_writedata(resource->ep, local_buff.buff,
local_buff.size, 0, dest_addr, remote_addr,
remote_key);
assert_int_equal(ret, 0);
assert_int_equal(g_ibv_submitted_wr_id_cnt, 1);
assert_int_equal(ret, -FI_ENOSYS);

efa_unit_test_buff_destruct(&local_buff);
}

void test_efa_rma_writemsg_with_inject(struct efa_resource **state)
{
struct efa_resource *resource = *state;
struct efa_unit_test_buff local_buff;
struct iovec iov;
struct fi_msg_rma msg = {0};
struct fi_rma_iov rma_iov;
fi_addr_t dest_addr;
void *desc;
int ret;

test_efa_rma_prep(resource, &dest_addr);
efa_unit_test_buff_construct(&local_buff, resource, 4096 /* buff_size */);

iov.iov_base = local_buff.buff;
iov.iov_len = local_buff.size;
desc = fi_mr_desc(local_buff.mr);
rma_iov.len = local_buff.size;
rma_iov.addr = 0x87654321;
rma_iov.key = 123456;
efa_unit_test_construct_msg_rma(&msg, &iov, &desc, 1, dest_addr, &rma_iov,
1, NULL, 0);

ret = fi_writemsg(resource->ep, &msg, FI_INJECT);
assert_int_equal(ret, -FI_ENOSYS);

efa_unit_test_buff_destruct(&local_buff);
}
1 change: 1 addition & 0 deletions prov/efa/test/efa_unit_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ int main(void)
cmocka_unit_test_setup_teardown(test_efa_rma_writedata, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rma_inject_write, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rma_inject_writedata, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_rma_writemsg_with_inject, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_cq_read_send_success, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_cq_read_recv_success, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
cmocka_unit_test_setup_teardown(test_efa_cq_read_send_failure, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown),
Expand Down
1 change: 1 addition & 0 deletions prov/efa/test/efa_unit_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ void test_efa_rma_writemsg();
void test_efa_rma_writedata();
void test_efa_rma_inject_write();
void test_efa_rma_inject_writedata();
void test_efa_rma_writemsg_with_inject();
void test_efa_cq_read_send_success();
void test_efa_cq_read_recv_success();
void test_efa_cq_read_send_failure();
Expand Down

0 comments on commit d0d2725

Please sign in to comment.