Select host NIC vendor code at runtime (#261)

Co-authored-by: Aurelien Bouteiller <aurelien.bouteiller@amd.com>
Co-authored-by: Omri Mor <omri50@gmail.com>
Этот коммит содержится в:
Yiltan
2025-09-23 15:33:03 -04:00
коммит произвёл GitHub
родитель 96336da78f
Коммит f4e4ea08a9
18 изменённых файлов: 3307 добавлений и 314 удалений
+16 -19
Просмотреть файл
@@ -48,25 +48,25 @@ find_library(IBVerbs_PROVIDER_LIBRARY
HINTS ${PC_IBVerbs_LIBDIR} ${PC_IBVerbs_LIBRARY_DIRS}
PATH_SUFFIXES lib lib64
)
elseif (GDA_BNXT)
find_library(IBVerbs_PROVIDER_LIBRARY
NAMES bnxt_re libbnxt_re
HINTS ${PC_IBVerbs_LIBDIR} ${PC_IBVerbs_LIBRARY_DIRS}
PATH_SUFFIXES lib lib64
)
else()
find_library(IBVerbs_PROVIDER_LIBRARY
NAMES mlx5 libmlx5
HINTS ${PC_IBVerbs_LIBDIR} ${PC_IBVerbs_LIBRARY_DIRS}
PATH_SUFFIXES lib lib64
)
endif()
find_package_handle_standard_args(IBVerbs DEFAULT_MSG
IBVerbs_LIBRARY IBVerbs_INCLUDE_DIR IBVerbs_PROVIDER_LIBRARY
)
mark_as_advanced(IBVerbs_LIBRARY IBVerbs_INCLUDE_DIR IBVerbs_PROVIDER_LIBRARY)
add_library(IBVerbs::verbs_provider UNKNOWN IMPORTED)
set_target_properties(IBVerbs::verbs_provider PROPERTIES
IMPORTED_LOCATION "${IBVerbs_PROVIDER_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${IBVerbs_PROVIDER_INCLUDE_DIR}"
)
target_link_libraries(IBVerbs::verbs IBVerbs::verbs_provider)
endif()
find_package_handle_standard_args(IBVerbs DEFAULT_MSG
IBVerbs_LIBRARY IBVerbs_INCLUDE_DIR
)
mark_as_advanced(IBVerbs_LIBRARY IBVerbs_INCLUDE_DIR)
if (IBVerbs_FOUND)
add_library(IBVerbs::verbs UNKNOWN IMPORTED)
set_target_properties(IBVerbs::verbs PROPERTIES
@@ -74,10 +74,7 @@ set_target_properties(IBVerbs::verbs PROPERTIES
INTERFACE_COMPILE_OPTIONS "${PC_IBVerbs_CFLAGS_OTHER}"
INTERFACE_INCLUDE_DIRECTORIES "${IBVerbs_INCLUDE_DIR}"
)
add_library(IBVerbs::verbs_provider UNKNOWN IMPORTED)
set_target_properties(IBVerbs::verbs_provider PROPERTIES
IMPORTED_LOCATION "${IBVerbs_PROVIDER_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${IBVerbs_PROVIDER_INCLUDE_DIR}"
)
target_link_libraries(IBVerbs::verbs INTERFACE IBVerbs::verbs_provider)
target_link_libraries(IBVerbs::verbs INTERFACE)
endif()
+2 -7
Просмотреть файл
@@ -35,6 +35,7 @@ target_sources(
gda_team.cpp
queue_pair.cpp
endian.cpp
segment_builder.cpp
topology.cpp
)
@@ -47,10 +48,4 @@ target_link_libraries(
numa
)
if (GDA_BNXT)
add_subdirectory(bnxt)
endif()
if (GDA_MLX5)
target_sources(${PROJECT_NAME} PRIVATE segment_builder.cpp)
endif()
add_subdirectory(bnxt)
+218 -183
Просмотреть файл
@@ -70,29 +70,8 @@ GDABackend::GDABackend(TcpBootstrap *bootstrap): Backend(bootstrap) {
}
void GDABackend::init() {
type = BackendType::GDA_BACKEND;
int ret;
#if defined(GDA_BNXT)
ret = bnxt_dv_dl_init();
if (ret != ROCSHMEM_SUCCESS) {
// Disable BNXT GDA support.
DPRINTF("Initializing rocSHMEM BNXT GDA support failed\n");
// We abort for now, but might remove that once we support
// multiple NIC types in the same build
abort();
}
#endif
#if defined(GDA_MLX5)
ret = mlx5_dv_dl_init();
if (ret != ROCSHMEM_SUCCESS) {
// Disable MLX5 GDA support.
DPRINTF("Initializing rocSHMEM MLX5 GDA support failed\n");
// We abort for now, but might remove that once we support
// multiple NIC types in the same build
abort();
}
#endif
type = BackendType::GDA_BACKEND;
read_env();
@@ -141,14 +120,11 @@ GDABackend::~GDABackend() {
cleanup_heap_memory_rkey();
cleanup_ibv();
#if defined(GDA_BNXT)
if (bnxtdv_handle_ != nullptr)
dlclose(bnxtdv_handle_);
#endif
#if defined(GDA_MLX5)
if (mlx5dv_handle_ != nullptr)
dlclose(mlx5dv_handle_);
#endif
}
void GDABackend::read_env() {
@@ -562,20 +538,20 @@ void GDABackend::rte_barrier() {
}
}
#if defined(GDA_MLX5)
int GDABackend::mlx5_dv_dl_init () {
mlx5dv_handle_ = dlopen("libmlx5.so", RTLD_NOW);
if (!mlx5dv_handle_) {
printf("Could not open libmlx5.so. Returning\n");
DPRINTF("Could not open libmlx5.so. Returning\n");
return ROCSHMEM_ERROR;
}
DLSYM_HELPER(mlx5dv_ftable_, mlx5dv_, mlx5dv_handle_, init_obj);
return ROCSHMEM_SUCCESS;
}
#endif
void GDABackend::setup_ibv() {
autodetect_dv_libs();
open_ib_device();
create_queues();
@@ -592,50 +568,50 @@ void GDABackend::setup_ibv() {
void GDABackend::cleanup_ibv() {
int err;
#ifdef GDA_BNXT
CHECK_HIP(hipHostUnregister(db_region_attr.dbr));
if (gda_vendor == GDAVendor::BNXT) {
CHECK_HIP(hipHostUnregister(db_region_attr.dbr));
for (int i = 0; i < qps.size(); i++) {
err = bnxtdv_ftable_.destroy_qp(qps[i]);
CHECK_ZERO(err, "bnxt_re_dv_destroy_qp");
for (int i = 0; i < qps.size(); i++) {
err = bnxtdv_ftable_.destroy_qp(qps[i]);
CHECK_ZERO(err, "bnxt_re_dv_destroy_qp");
err = bnxtdv_ftable_.umem_dereg(bnxt_qps[i].attr.rq_umem_handle);
CHECK_ZERO(err, "bnxt_re_dv_umem_dereg (RQ)");
err = bnxtdv_ftable_.umem_dereg(bnxt_qps[i].attr.rq_umem_handle);
CHECK_ZERO(err, "bnxt_re_dv_umem_dereg (RQ)");
err = bnxtdv_ftable_.umem_dereg(bnxt_qps[i].attr.sq_umem_handle);
CHECK_ZERO(err, "bnxt_re_dv_umem_dereg (SQ)");
err = bnxtdv_ftable_.umem_dereg(bnxt_qps[i].attr.sq_umem_handle);
CHECK_ZERO(err, "bnxt_re_dv_umem_dereg (SQ)");
CHECK_HIP(hipFree(bnxt_qps[i].sq_buf));
CHECK_HIP(hipFree(bnxt_qps[i].rq_buf));
CHECK_HIP(hipFree(bnxt_qps[i].sq_buf));
CHECK_HIP(hipFree(bnxt_qps[i].rq_buf));
err = bnxtdv_ftable_.destroy_cq(cqs[i]);
CHECK_ZERO(err, "bnxt_re_dv_destroy_cq");
err = bnxtdv_ftable_.destroy_cq(cqs[i]);
CHECK_ZERO(err, "bnxt_re_dv_destroy_cq");
err = bnxtdv_ftable_.umem_dereg(bnxt_cqs[i].umem_handle);
CHECK_ZERO(err, "bnxt_re_dv_umem_dereg");
err = bnxtdv_ftable_.umem_dereg(bnxt_cqs[i].umem_handle);
CHECK_ZERO(err, "bnxt_re_dv_umem_dereg");
CHECK_HIP(hipFree(bnxt_cqs[i].buf));
CHECK_HIP(hipFree(bnxt_cqs[i].buf));
}
} else {
for (int i = 0; i < qps.size(); i++) {
err = ibv_destroy_qp(qps[i]);
CHECK_ZERO(err, "ibv_destroy_qp");
err = ibv_destroy_cq(cqs[i]);
CHECK_ZERO(err, "ibv_destroy_cqs");
}
if (gda_vendor == GDAVendor::IONIC) {
err = ibv_dealloc_pd(pd_uxdma[0]);
CHECK_ZERO(err, "ibv_dealloc_pd (uxdma[0])");
err = ibv_dealloc_pd(pd_uxdma[1]);
CHECK_ZERO(err, "ibv_dealloc_pd (uxdma[1])");
}
err = ibv_dealloc_pd(pd_parent);
CHECK_ZERO(err, "ibv_dealloc_pd (pd_parent)");
}
#else
for (int i = 0; i < qps.size(); i++) {
err = ibv_destroy_qp(qps[i]);
CHECK_ZERO(err, "ibv_destroy_qp");
err = ibv_destroy_cq(cqs[i]);
CHECK_ZERO(err, "ibv_destroy_cqs");
}
#ifdef GDA_IONIC
err = ibv_dealloc_pd(pd_uxdma[0]);
CHECK_ZERO(err, "ibv_dealloc_pd (uxdma[0])");
err = ibv_dealloc_pd(pd_uxdma[1]);
CHECK_ZERO(err, "ibv_dealloc_pd (uxdma[1])");
#endif
err = ibv_dealloc_pd(pd_parent);
CHECK_ZERO(err, "ibv_dealloc_pd (pd_parent)");
#endif
err = ibv_dealloc_pd(pd_orig);
CHECK_ZERO(err, "ibv_dealloc_pd (pd_orig)");
@@ -644,6 +620,40 @@ void GDABackend::cleanup_ibv() {
CHECK_ZERO(err, "ibv_close_device");
}
void GDABackend::autodetect_dv_libs() {
int ret;
#ifdef GDA_IONIC
gda_vendor = GDAVendor::IONIC;
#endif
if (gda_vendor == GDAVendor::NONE) {
ret = bnxt_dv_dl_init();
if (ret == ROCSHMEM_SUCCESS) {
gda_vendor = GDAVendor::BNXT;
} else {
DPRINTF("Initializing rocSHMEM BNXT GDA support failed\n");
}
}
if (gda_vendor == GDAVendor::NONE) {
ret = mlx5_dv_dl_init();
if (ret == ROCSHMEM_SUCCESS) {
gda_vendor = GDAVendor::MLX5;
} else {
DPRINTF("Initializing rocSHMEM MLX5 GDA support failed\n");
}
}
if (gda_vendor == GDAVendor::NONE) {
printf("Initializing rocSHMEM with IONIC, BNXT, or MLX5 GDA support failed\n");
abort();
}
}
void GDABackend::exchange_qp_dest_info() {
for (int i = 0; i < qps.size(); i++) {
dest_info[i].lid = portinfo.lid;
@@ -712,9 +722,14 @@ void GDABackend::setup_gpu_qps() {
CHECK_NNULL(host_qps, "malloc (host_qps)");
for (int i = 0; i < qp_objs_count; i++) {
new (&host_qps[i]) QueuePair(pd_orig);
new (&host_qps[i]) QueuePair(pd_orig, gda_vendor);
CHECK_HIP(hipMemcpy(&gpu_qps[i], &host_qps[i], sizeof(QueuePair), hipMemcpyDefault));
initialize_gpu_qp(&gpu_qps[i], i);
if (gda_vendor == GDAVendor::BNXT) {
bnxt_initialize_gpu_qp(&gpu_qps[i], i);
} else {
initialize_gpu_qp(&gpu_qps[i], i);
}
}
}
@@ -766,9 +781,9 @@ void GDABackend::open_ib_device() {
CHECK_NNULL(pd_orig, "ib allocate pd");
dump_ibv_pd(pd_orig);
#ifndef GDA_BNXT
create_parent_domain();
#endif
if (gda_vendor == GDAVendor::IONIC || gda_vendor == GDAVendor::MLX5) {
create_parent_domain();
}
err = ibv_query_port(context, port, &portinfo);
CHECK_ZERO(err, "ibv_query_port");
@@ -801,11 +816,11 @@ void GDABackend::modify_qps_reset_to_init() {
| IBV_QP_ACCESS_FLAGS;
for (int i =0; i < qps.size() ; i++) {
#ifdef GDA_BNXT
err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0);
#else
err = ibv_modify_qp(qps[i], &attr, attr_mask);
#endif
if (gda_vendor == GDAVendor::BNXT) {
err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0);
} else {
err = ibv_modify_qp(qps[i], &attr, attr_mask);
}
CHECK_ZERO(err, "modify_qp (INIT)");
}
}
@@ -818,10 +833,15 @@ void GDABackend::modify_qps_init_to_rtr() {
memset(&attr, 0, sizeof(struct ibv_qp_attr));
attr.qp_state = IBV_QPS_RTR;
attr.path_mtu = portinfo.active_mtu;
attr.max_dest_rd_atomic = GDA_MAX_ATOMIC;
attr.min_rnr_timer = 12;
attr.ah_attr.port_num = port;
if (gda_vendor == GDAVendor::IONIC) {
attr.max_dest_rd_atomic = 15;
} else {
attr.max_dest_rd_atomic = 1;
}
if (portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) {
attr.ah_attr.grh.sgid_index = gid_index;
attr.ah_attr.is_global = 1;
@@ -847,11 +867,11 @@ void GDABackend::modify_qps_init_to_rtr() {
attr.ah_attr.dlid = dest_info[i].lid;
}
#ifdef GDA_BNXT
err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0);
#else
err = ibv_modify_qp(qps[i], &attr, attr_mask);
#endif
if (gda_vendor == GDAVendor::BNXT) {
err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0);
} else {
err = ibv_modify_qp(qps[i], &attr, attr_mask);
}
CHECK_ZERO(err, "modify_qp (RTR)");
}
}
@@ -863,11 +883,16 @@ void GDABackend::modify_qps_rtr_to_rts() {
memset(&attr, 0, sizeof(struct ibv_qp_attr));
attr.qp_state = IBV_QPS_RTS;
attr.max_rd_atomic = GDA_MAX_ATOMIC;
attr.timeout = 14;
attr.retry_cnt = 7;
attr.rnr_retry = 7;
if (gda_vendor == GDAVendor::IONIC) {
attr.max_dest_rd_atomic = 15;
} else {
attr.max_dest_rd_atomic = 1;
}
attr_mask = IBV_QP_STATE
| IBV_QP_SQ_PSN
| IBV_QP_MAX_QP_RD_ATOMIC
@@ -878,11 +903,11 @@ void GDABackend::modify_qps_rtr_to_rts() {
for (int i = 0; i < qps.size(); i++) {
attr.sq_psn = dest_info[i].psn;
#ifdef GDA_BNXT
err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0);
#else
err = ibv_modify_qp(qps[i], &attr, attr_mask);
#endif
if (gda_vendor == GDAVendor::BNXT) {
err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0);
} else {
err = ibv_modify_qp(qps[i], &attr, attr_mask);
}
CHECK_ZERO(err, "modify_qp (RTS)");
}
}
@@ -891,11 +916,11 @@ void GDABackend::create_queues() {
int ncqes;
int resize_length;
#ifdef GDA_IONIC
ncqes = sq_size << 1;
#else
ncqes = sq_size;
#endif
if (gda_vendor == GDAVendor::IONIC) {
ncqes = sq_size << 1;
} else {
ncqes = sq_size;
}
resize_length = (maximum_num_contexts_ + 1) * num_pes;
@@ -903,24 +928,28 @@ void GDABackend::create_queues() {
cqs.resize(resize_length);
qps.resize(resize_length);
#ifdef GDA_BNXT
bnxt_cqs.resize(resize_length);
bnxt_qps.resize(resize_length);
#endif
create_cqs(ncqes);
create_qps(sq_size);
if (gda_vendor == GDAVendor::BNXT) {
bnxt_create_cqs(ncqes);
bnxt_create_qps(sq_size);
} else {
create_cqs(ncqes);
create_qps(sq_size);
}
}
#ifndef GDA_BNXT
void* GDABackend::pd_alloc(struct ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type) {
void* GDABackend::pd_alloc_device_uncached(struct ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type) {
void* dev_ptr{nullptr};
//TODO make this configurable, presumably we want it on device for all types?
#ifdef GDA_IONIC
CHECK_HIP(hipExtMallocWithFlags(reinterpret_cast<void**>(&dev_ptr), size, hipDeviceMallocUncached));
#else
memset(dev_ptr, 0, size);
return dev_ptr;
}
void* GDABackend::pd_alloc_host(struct ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type) {
void* dev_ptr{nullptr};
CHECK_HIP(hipHostMalloc(reinterpret_cast<void**>(&dev_ptr), size, hipHostMallocDefault));
#endif
memset(dev_ptr, 0, size);
return dev_ptr;
}
@@ -933,12 +962,17 @@ void GDABackend::create_parent_domain() {
struct ibv_parent_domain_init_attr pattr;
memset(&pattr, 0, sizeof(struct ibv_parent_domain_init_attr));
pattr.pd = pd_orig,
pattr.pd = pd_orig;
pattr.td = nullptr,
pattr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_ALLOCATORS,
pattr.alloc = GDABackend::pd_alloc,
pattr.free = GDABackend::pd_release,
pattr.pd_context = nullptr,
pattr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_ALLOCATORS;
pattr.free = GDABackend::pd_release;
pattr.pd_context = nullptr;
if (gda_vendor == GDAVendor::IONIC) {
pattr.alloc = GDABackend::pd_alloc_device_uncached;
} else {
pattr.alloc = GDABackend::pd_alloc_host;
}
pd_parent = ibv_alloc_parent_domain(context, &pattr);
CHECK_NNULL(pd_parent, "ibv_alloc_parent_domain");
@@ -956,7 +990,7 @@ void GDABackend::create_parent_domain() {
ionic_dv_pd_set_rqcmb(pd_uxdma[uxdma_i], false, false, false);
ionic_dv_pd_set_udma_mask(pd_uxdma[uxdma_i], 1u << uxdma_i);
}
#endif
#endif /* GDA_IONIC */
}
void GDABackend::create_cqs(int cqe) {
@@ -973,9 +1007,9 @@ void GDABackend::create_cqs(int cqe) {
cq_attr.parent_domain = pd_parent;
for (int i = 0; i < qps.size(); i++) {
#ifdef GDA_IONIC
cq_attr.parent_domain = pd_uxdma[((i + 1) / 2) & 1];
#endif
if (gda_vendor == GDAVendor::IONIC) {
cq_attr.parent_domain = pd_uxdma[((i + 1) / 2) & 1];
}
cq_ex = ibv_create_cq_ex(context, &cq_attr);
CHECK_NNULL(cq_ex, "ibv_create_cq_ex");
@@ -1028,76 +1062,77 @@ void GDABackend::initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
gpu_qp->lkey = heap_mr->lkey;
gpu_qp->rkey = heap_rkey[conn_num % num_pes];
gpu_qp->inline_threshold = 32;
#else // !GDA_IONIC
mlx5dv_cq cq_out;
mlx5dv_obj mlx_obj;
mlx_obj.cq.in = cqs[conn_num];
mlx_obj.cq.out = &cq_out;
mlx5dv_ftable_.init_obj(&mlx_obj, MLX5DV_OBJ_CQ);
dump_mlx5dv_cq(&cq_out, conn_num);
#endif /* GDA_IONIC */
if (gda_vendor == GDAVendor::MLX5) {
mlx5dv_cq cq_out;
mlx5dv_obj mlx_obj;
mlx_obj.cq.in = cqs[conn_num];
mlx_obj.cq.out = &cq_out;
mlx5dv_ftable_.init_obj(&mlx_obj, MLX5DV_OBJ_CQ);
dump_mlx5dv_cq(&cq_out, conn_num);
/*
* struct mlx5dv_cq {
* void *buf;
* __be32 *dbrec;
* uint32_t cqe_cnt;
* uint32_t cqe_size;
* void *cq_uar;
* uint32_t cqn;
* uint64_t comp_mask;
* };
*/
/*
* struct mlx5dv_cq {
* void *buf;
* __be32 *dbrec;
* uint32_t cqe_cnt;
* uint32_t cqe_size;
* void *cq_uar;
* uint32_t cqn;
* uint64_t comp_mask;
* };
*/
gpu_qp->cq_buf = reinterpret_cast<mlx5_cqe64*>(cq_out.buf);
gpu_qp->cq_cnt = cq_out.cqe_cnt;
gpu_qp->cq_log_cnt = log2(cq_out.cqe_cnt);
gpu_qp->cq_dbrec = cq_out.dbrec;
gpu_qp->cq_buf = reinterpret_cast<mlx5_cqe64*>(cq_out.buf);
gpu_qp->cq_cnt = cq_out.cqe_cnt;
gpu_qp->cq_log_cnt = log2(cq_out.cqe_cnt);
gpu_qp->cq_dbrec = cq_out.dbrec;
mlx5dv_qp qp_out;
mlx_obj.qp.in = qps[conn_num];
mlx_obj.qp.out = &qp_out;
mlx5dv_ftable_.init_obj(&mlx_obj, MLX5DV_OBJ_QP);
dump_mlx5dv_qp(&qp_out, conn_num);
mlx5dv_qp qp_out;
mlx_obj.qp.in = qps[conn_num];
mlx_obj.qp.out = &qp_out;
mlx5dv_ftable_.init_obj(&mlx_obj, MLX5DV_OBJ_QP);
dump_mlx5dv_qp(&qp_out, conn_num);
/*
* struct mlx5dv_qp {
* __be32 *dbrec;
* struct {
* void *buf;
* uint32_t wqe_cnt;
* uint32_t stride;
* } sq;
* struct {
* void *buf;
* uint32_t wqe_cnt;
* uint32_t stride;
* } rq;
* struct {
* void *reg;
* uint32_t size;
* } bf;
* uint64_t comp_mask;
* off_t uar_mmap_offset;
* uint32_t tirn;
* uint32_t tisn;
* uint32_t rqn;
* uint32_t sqn;
* uint64_t tir_icm_addr;
* };
*/
/*
* struct mlx5dv_qp {
* __be32 *dbrec;
* struct {
* void *buf;
* uint32_t wqe_cnt;
* uint32_t stride;
* } sq;
* struct {
* void *buf;
* uint32_t wqe_cnt;
* uint32_t stride;
* } rq;
* struct {
* void *reg;
* uint32_t size;
* } bf;
* uint64_t comp_mask;
* off_t uar_mmap_offset;
* uint32_t tirn;
* uint32_t tisn;
* uint32_t rqn;
* uint32_t sqn;
* uint64_t tir_icm_addr;
* };
*/
gpu_qp->dbrec = &qp_out.dbrec[1]; // points to two pointers: 0 -> MLX5_REC_DBR, 1 -> MLX5_SND_DBR
gpu_qp->sq_buf = reinterpret_cast<uint64_t*>(qp_out.sq.buf);
gpu_qp->sq_wqe_cnt = qp_out.sq.wqe_cnt;
gpu_qp->rkey = htobe32(heap_rkey[conn_num % num_pes]);
gpu_qp->lkey = htobe32(heap_mr->lkey);
gpu_qp->qp_num = qps[conn_num]->qp_num;
gpu_qp->inline_threshold = inline_threshold;
// The 2 in qp_out.bf.size * 2 below facilitates the switching between blue flame registers
void* gpu_ptr{nullptr};
rocm_memory_lock_to_fine_grain(qp_out.bf.reg, qp_out.bf.size * 2, &gpu_ptr, hip_dev_id);
gpu_qp->db.ptr = reinterpret_cast<uint64_t*>(gpu_ptr);
#endif // !GDA_IONIC
gpu_qp->dbrec = &qp_out.dbrec[1]; // points to two pointers: 0 -> MLX5_REC_DBR, 1 -> MLX5_SND_DBR
gpu_qp->sq_buf = reinterpret_cast<uint64_t*>(qp_out.sq.buf);
gpu_qp->sq_wqe_cnt = qp_out.sq.wqe_cnt;
gpu_qp->rkey = htobe32(heap_rkey[conn_num % num_pes]);
gpu_qp->lkey = htobe32(heap_mr->lkey);
gpu_qp->qp_num = qps[conn_num]->qp_num;
gpu_qp->inline_threshold = inline_threshold;
// The 2 in qp_out.bf.size * 2 below facilitates the switching between blue flame registers
void* gpu_ptr{nullptr};
rocm_memory_lock_to_fine_grain(qp_out.bf.reg, qp_out.bf.size * 2, &gpu_ptr, hip_dev_id);
gpu_qp->db.ptr = reinterpret_cast<uint64_t*>(gpu_ptr);
}
}
void GDABackend::create_qps(int sq_length) {
@@ -1107,18 +1142,19 @@ void GDABackend::create_qps(int sq_length) {
attr.cap.max_send_wr = sq_length;
attr.cap.max_send_sge = 1;
attr.cap.max_inline_data = inline_threshold;
#ifdef GDA_IONIC
attr.cap.max_recv_sge = 1; // TODO allow zero sges in the driver
#endif
attr.sq_sig_all = 0;
attr.qp_type = IBV_QPT_RC;
attr.comp_mask = IBV_QP_INIT_ATTR_PD;
attr.pd = pd_parent;
if (gda_vendor == GDAVendor::IONIC) {
attr.cap.max_recv_sge = 1; // TODO allow zero sges in the driver
}
for (int i = 0; i < qps.size(); i++) {
#ifdef GDA_IONIC
attr.pd = pd_uxdma[((i + 1) / 2) & 1];
#endif
if (gda_vendor == GDAVendor::IONIC) {
attr.pd = pd_uxdma[((i + 1) / 2) & 1];
}
attr.send_cq = cqs[i];
attr.recv_cq = cqs[i];
@@ -1126,7 +1162,6 @@ void GDABackend::create_qps(int sq_length) {
CHECK_NNULL(qps[i], "ibv_create_qp_ex");
}
}
#endif
void GDABackend::select_gid_index() {
struct ibv_gid_entry *gid_entries;
+29 -21
Просмотреть файл
@@ -36,9 +36,9 @@
#include "queue_pair.hpp"
#include "bootstrap/bootstrap.hpp"
#include "debug_gda.hpp"
#ifdef GDA_BNXT
#include <infiniband/bnxt_re_dv.h>
#include "gda/ionic/provider_gda_ionic.hpp"
#include "gda/bnxt/provider_gda_bnxt.hpp"
#include "gda/mlx5/provider_gda_mlx5.hpp"
struct bnxtdv_funcs_t {
int (*init_obj)(struct bnxt_re_dv_obj *obj, uint64_t obj_type);
@@ -61,15 +61,10 @@ struct bnxtdv_funcs_t {
int (*get_default_db_region)(struct ibv_context *ibvctx,
struct bnxt_re_dv_db_region_attr *out);
};
#endif /* GDA_BNXT */
#ifdef GDA_MLX5
#include <infiniband/mlx5dv.h>
struct mlx5dv_funcs_t {
int (*init_obj)(struct mlx5dv_obj *obj, uint64_t obj_type);
};
#endif /* GDA_MLX5 */
/* Helper Macros for handling dynamic libraries */
#define PPCAT_NX(prefix, func_name) prefix##func_name
@@ -96,6 +91,13 @@ class GDAHostContext;
class QueuePair;
class HostInterface;
enum GDAVendor {
NONE,
IONIC,
BNXT,
MLX5
};
class GDABackend : public Backend {
private:
typedef struct dest_info {
@@ -108,6 +110,7 @@ class GDABackend : public Backend {
char *requested_dev = nullptr;
struct ibv_context *context = nullptr;;
struct ibv_pd *pd_orig = nullptr;
enum GDAVendor gda_vendor = GDAVendor::NONE;
struct ibv_port_attr portinfo;
union ibv_gid gid;
@@ -125,21 +128,23 @@ class GDABackend : public Backend {
std::vector<ibv_cq*> cqs;
std::vector<dest_info_t> dest_info;
#ifdef GDA_BNXT
/* GDA_BNXT START */
std::vector<struct bnxt_host_qp> bnxt_qps;
std::vector<struct bnxt_host_cq> bnxt_cqs;
struct bnxt_re_dv_db_region_attr db_region_attr;
#else
struct ibv_pd *pd_parent = nullptr;
#endif
/* GDA_BNXT END */
#ifdef GDA_IONIC
/* GDA_IONIC & GDA_MLX5 START */
struct ibv_pd *pd_parent = nullptr;
/* GDA_IONIC & GDA_MLX5 END */
/* GDA_IONIC START */
struct ibv_pd *pd_uxdma[2];
void *gpu_db_page = nullptr;
uint64_t *gpu_db_cq = nullptr;
uint64_t *gpu_db_sq = nullptr;
#endif
/* GDA_IONIC END */
/**
* @brief Common code invoked from the different constructors
@@ -327,6 +332,7 @@ class GDABackend : public Backend {
void cleanup_heap_memory_rkey();
void initialize_gpu_qp(QueuePair* qp, int conn_num);
void bnxt_initialize_gpu_qp(QueuePair* qp, int conn_num);
/**
* @brief Setup InfiniBand Resources
@@ -338,6 +344,11 @@ class GDABackend : public Backend {
*/
void cleanup_ibv();
/**
* @brief Detect the available direct verbs libraries
*/
void autodetect_dv_libs();
/**
* @brief Open InfiniBand Device and create common structures
*/
@@ -357,11 +368,13 @@ class GDABackend : public Backend {
* @brief Create all CQs with a of length ncqes
*/
void create_cqs(int ncqes);
void bnxt_create_cqs(int ncqes);
/**
* @brief Create all QPs with a SQ of length sq_length
*/
void create_qps(int sq_length);
void bnxt_create_qps(int sq_length);
/**
* @brief Exchange QP information for connection
@@ -388,13 +401,12 @@ class GDABackend : public Backend {
*/
int ibv_mtu_to_int(enum ibv_mtu mtu);
#ifndef GDA_BNXT
static void* pd_alloc(ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type);
static void* pd_alloc_host(ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type);
static void* pd_alloc_device_uncached(ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type);
static void pd_release(ibv_pd* pd, void* pd_context, void* ptr, uint64_t resource_type);
void create_parent_domain();
#endif
void setup_gpu_qps();
void cleanup_gpu_qps();
@@ -501,7 +513,6 @@ class GDABackend : public Backend {
*/
void rte_barrier();
#ifdef GDA_BNXT
/**
* @brief structures holding the function pointers to the direct verbs functionality
* of each network driver.
@@ -517,9 +528,7 @@ class GDABackend : public Backend {
* @brief initialize function table for BCOM direct verbs support
*/
int bnxt_dv_dl_init();
#endif
#ifdef GDA_MLX5
/**
* @brief structures holding the function pointers to the direct verbs functionality
* of each network driver.
@@ -535,7 +544,6 @@ class GDABackend : public Backend {
* @brief initialize function table for MLNX direct verbs support
*/
int mlx5_dv_dl_init();
#endif
};
} // namespace rocshmem
+7
Просмотреть файл
@@ -25,5 +25,12 @@ target_sources(
${PROJECT_NAME}
PRIVATE
backend_gda_bnxt.cpp
)
if(GDA_BNXT)
target_sources(
${PROJECT_NAME}
PRIVATE
queue_pair_bnxt.cpp
)
endif()
+3 -4
Просмотреть файл
@@ -28,7 +28,7 @@
namespace rocshmem {
void GDABackend::initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
void GDABackend::bnxt_initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
struct bnxt_re_dv_obj dv_obj;
struct bnxt_re_dv_cq dv_cq;
struct bnxt_re_dv_qp dv_qp;
@@ -90,7 +90,7 @@ void GDABackend::initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
gpu_qp->inline_threshold = inline_threshold;
}
void GDABackend::create_cqs(int cqe) {
void GDABackend::bnxt_create_cqs(int cqe) {
struct bnxt_re_dv_cq_attr cq_attr;
struct bnxt_re_dv_cq_init_attr cq_init_attr;
struct bnxt_re_dv_umem_reg_attr umem_attr;
@@ -126,7 +126,7 @@ void GDABackend::create_cqs(int cqe) {
}
}
void GDABackend::create_qps(int sq_length) {
void GDABackend::bnxt_create_qps(int sq_length) {
struct ibv_qp_init_attr ib_qp_attr;
struct bnxt_re_dv_umem_reg_attr umem_attr;
void *sq_ptr;
@@ -246,4 +246,3 @@ int GDABackend::bnxt_dv_dl_init() {
}
} // namespace rocshmem
+219
Просмотреть файл
@@ -0,0 +1,219 @@
/*
* Copyright (c) 2025, Broadcom. All rights reserved. The term
* Broadcom refers to Broadcom Limited and/or its subsidiaries.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Description: Direct verb support user interface header
*/
#ifndef __BNXT_RE_DV_H__
#define __BNXT_RE_DV_H__
#include <stdint.h>
#include <infiniband/verbs.h>
#ifdef __cplusplus
extern "C" {
#endif
struct bnxt_re_dv_qp {
uint64_t wqe_cnt;
uint64_t comp_mask;
};
struct bnxt_re_dv_cq {
uint32_t cqn;
uint32_t cqe_size;
uint64_t comp_mask;
};
struct bnxt_re_dv_srq {
uint32_t srqn;
uint64_t comp_mask;
};
struct bnxt_re_dv_ah {
uint32_t avid;
uint64_t comp_mask;
};
struct bnxt_re_dv_pd {
uint32_t pdn;
uint64_t comp_mask;
};
struct bnxt_re_dv_obj {
struct {
struct ibv_qp *in;
struct bnxt_re_dv_qp *out;
} qp;
struct {
struct ibv_cq *in;
struct bnxt_re_dv_cq *out;
} cq;
struct {
struct ibv_srq *in;
struct bnxt_re_dv_srq *out;
} srq;
struct {
struct ibv_ah *in;
struct bnxt_re_dv_ah *out;
} ah;
struct {
struct ibv_pd *in;
struct bnxt_re_dv_pd *out;
} pd;
};
int bnxt_re_dv_init_obj(struct bnxt_re_dv_obj *obj, uint64_t obj_type);
enum bnxt_re_dv_obj_type {
BNXT_RE_DV_OBJ_QP = 1 << 0,
BNXT_RE_DV_OBJ_CQ = 1 << 1,
BNXT_RE_DV_OBJ_SRQ = 1 << 2,
BNXT_RE_DV_OBJ_AH = 1 << 3,
BNXT_RE_DV_OBJ_PD = 1 << 4,
};
int bnxt_re_dv_modify_qp_udp_sport(struct ibv_qp *qp, uint16_t udp_sport);
struct bnxt_re_dv_db_region_attr {
uint32_t handle;
uint32_t dpi;
uint64_t umdbr;
__u64 *dbr;
};
#ifdef EXPERIMENTAL_APIS
struct bnxt_re_dv_db_region_attr *
bnxt_re_dv_alloc_db_region(struct ibv_context *ctx);
int bnxt_re_dv_free_db_region(struct ibv_context *ctx,
struct bnxt_re_dv_db_region_attr *attr);
#endif
int bnxt_re_dv_get_default_db_region(struct ibv_context *ibvctx,
struct bnxt_re_dv_db_region_attr *out);
enum bnxt_re_dv_umem_in_flags {
BNXT_RE_DV_UMEM_FLAGS_DMABUF = 1 << 0,
};
struct bnxt_re_dv_umem_reg_attr {
void *addr;
size_t size;
uint32_t access_flags;
uint64_t pgsz_bitmap;
uint64_t comp_mask;
int dmabuf_fd;
};
struct bnxt_re_dv_cq_init_attr {
uint64_t cq_handle;
void *umem_handle; /* umem_handle from umem_reg */
uint64_t cq_umem_offset; /* offset into umem */
uint32_t ncqe;
};
struct bnxt_re_dv_cq_attr {
uint32_t ncqe; /* no. of entries */
uint32_t cqe_size; /* size of entries */
};
struct bnxt_re_dv_qp_init_attr {
/* Standard ibv params */
enum ibv_qp_type qp_type;
uint32_t max_send_wr;
uint32_t max_recv_wr;
uint32_t max_send_sge;
uint32_t max_recv_sge;
uint32_t max_inline_data;
struct ibv_cq *send_cq;
struct ibv_cq *recv_cq;
struct ibv_srq *srq;
/* DV params */
uint64_t qp_handle; /* to match with cqe */
void *dbr_handle; /* dbr_handle from alloc_dbr */
void *sq_umem_handle; /* umem_handle from umem_reg */
uint64_t sq_umem_offset; /* offset into umem */
uint32_t sq_len; /* sq length including MSN area */
uint32_t sq_slots; /* sq length in slots */
void *rq_umem_handle; /* umem_handle from umem_reg */
uint64_t rq_umem_offset; /* offset into umem */
uint32_t sq_wqe_sz; /* sq wqe size */
uint32_t sq_psn_sz; /* sq psn size */
uint32_t sq_npsn; /* sq num psn entries */
uint32_t rq_len; /* rq length */
uint32_t rq_slots; /* rq length in slots */
uint32_t rq_wqe_sz; /* rq wqe size */
uint64_t comp_mask; /* compatibility mask for future updates */
};
struct bnxt_re_dv_qp_mem_info {
uint64_t qp_handle; /* to match with cqe */
uint64_t sq_va; /* Peer-mem sq-va (not dma mapped) */
uint32_t sq_len; /* sq length including MSN area */
uint32_t sq_slots; /* sq length in slots */
uint32_t sq_wqe_sz; /* sq wqe size */
uint32_t sq_psn_sz; /* sq psn size */
uint32_t sq_npsn; /* sq num psn entries */
uint64_t rq_va; /* Peer-mem rq-va (not dma mapped) */
uint32_t rq_len; /* rq length */
uint32_t rq_slots; /* rq length in slots */
uint32_t rq_wqe_sz; /* rq wqe size */
uint64_t comp_mask; /* compatibility bit mask */
};
void *bnxt_re_dv_umem_reg(struct ibv_context *ibvctx,
struct bnxt_re_dv_umem_reg_attr *in);
int bnxt_re_dv_umem_dereg(void *umem_handle);
struct ibv_cq *bnxt_re_dv_create_cq(struct ibv_context *ibvctx,
struct bnxt_re_dv_cq_init_attr *cq_attr);
int bnxt_re_dv_destroy_cq(struct ibv_cq *ibv_cq);
struct ibv_qp *bnxt_re_dv_create_qp(struct ibv_pd *pd,
struct bnxt_re_dv_qp_init_attr *qp_attr);
int bnxt_re_dv_destroy_qp(struct ibv_qp *ibvqp);
int bnxt_re_dv_modify_qp(struct ibv_qp *ibv_qp, struct ibv_qp_attr *attr,
int attr_mask, uint32_t type, uint32_t value);
int bnxt_re_dv_query_qp(void *qp_handle, struct ib_uverbs_qp_attr *attr);
int bnxt_re_dv_qp_mem_alloc(struct ibv_pd *ibvpd,
struct ibv_qp_init_attr *attr,
struct bnxt_re_dv_qp_mem_info *dv_qp_mem);
int bnxt_re_dv_qp_get_mem_info(struct ibv_pd *ibvpd,
struct ibv_qp_init_attr *attr,
struct bnxt_re_dv_qp_mem_info *qp_mem);
int bnxt_re_dv_get_cq_attr(struct ibv_context *ibvctx, uint32_t ncqe,
struct bnxt_re_dv_cq_attr *cq_attr);
void *bnxt_re_dv_cq_mem_alloc(struct ibv_context *ibvctx, int num_cqe,
struct bnxt_re_dv_cq_attr *cq_attr);
#ifdef __cplusplus
}
#endif
#endif /* __BNXT_RE_DV_H__ */
+416
Просмотреть файл
@@ -0,0 +1,416 @@
/*
* Copyright (c) 2025, Broadcom. All rights reserved. The term
* Broadcom refers to Broadcom Limited and/or its subsidiaries.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Description: Fast path definitions for bnxt_re
*/
#ifndef __BNXT_RE_HSI_H__
#define __BNXT_RE_HSI_H__
#ifdef __cplusplus
extern "C" {
#endif
#define true 1
#define false 0
#define BNXT_RE_SLOT_SIZE_BB 16
#define BNXT_RE_STATIC_WQE_SIZE_SLOTS 4
#define BNXT_RE_STATIC_WQE_BB (BNXT_RE_STATIC_WQE_SIZE_SLOTS * BNXT_RE_SLOT_SIZE_BB)
#define BNXT_RE_STATIC_WQE_SHIFT 6
#define BNXT_RE_STATIC_RQE_SIZE_SLOTS 4
#define BNXT_RE_STATIC_RQE_BB (BNXT_RE_STATIC_RQE_SIZE_SLOTS * BNXT_RE_SLOT_SIZE_BB)
#define BNXT_RE_STATIC_RQE_SHIFT 6
#define BNXT_RE_STATIC_CQE_SIZE_SLOTS 4
#define BNXT_RE_STATIC_CQE_BB (BNXT_RE_STATIC_CQE_SIZE_SLOTS * BNXT_RE_SLOT_SIZE_BB)
#define BNXT_RE_STATIC_CQE_SHIFT 6
#define BNXT_RE_QUEUE_START_PHASE 0x01
enum bnxt_re_wr_opcode {
BNXT_RE_WR_OPCD_SEND = 0x00,
BNXT_RE_WR_OPCD_SEND_IMM = 0x01,
BNXT_RE_WR_OPCD_SEND_INVAL = 0x02,
BNXT_RE_WR_OPCD_RDMA_WRITE = 0x04,
BNXT_RE_WR_OPCD_RDMA_WRITE_IMM = 0x05,
BNXT_RE_WR_OPCD_RDMA_READ = 0x06,
BNXT_RE_WR_OPCD_ATOMIC_CS = 0x08,
BNXT_RE_WR_OPCD_ATOMIC_FA = 0x0B,
BNXT_RE_WR_OPCD_LOC_INVAL = 0x0C,
BNXT_RE_WR_OPCD_BIND = 0x0E,
BNXT_RE_WR_OPCD_FR_PPMR = 0x0F,
BNXT_RE_WR_OPCD_RECV = 0x80,
BNXT_RE_WR_OPCD_INVAL = 0xFF
};
enum bnxt_re_wr_flags {
BNXT_RE_WR_FLAGS_DBG_TRACE = 0x40,
BNXT_RE_WR_FLAGS_TS_EN = 0x20,
BNXT_RE_WR_FLAGS_INLINE = 0x10,
BNXT_RE_WR_FLAGS_SE = 0x08,
BNXT_RE_WR_FLAGS_UC_FENCE = 0x04,
BNXT_RE_WR_FLAGS_RD_FENCE = 0x02,
BNXT_RE_WR_FLAGS_SIGNALED = 0x01
};
#define BNXT_RE_MEMW_TYPE_2 0x02
#define BNXT_RE_MEMW_TYPE_1 0x00
enum bnxt_re_wr_bind_acc {
BNXT_RE_WR_BIND_ACC_LWR = 0x01,
BNXT_RE_WR_BIND_ACC_RRD = 0x02,
BNXT_RE_WR_BIND_ACC_RWR = 0x04,
BNXT_RE_WR_BIND_ACC_RAT = 0x08,
BNXT_RE_WR_BIND_ACC_MWB = 0x10,
BNXT_RE_WR_BIND_ACC_ZBVA = 0x01,
BNXT_RE_WR_BIND_ACC_SHIFT = 0x10
};
enum bnxt_re_wc_type {
BNXT_RE_WC_TYPE_SEND = 0x00,
BNXT_RE_WC_TYPE_RECV_RC = 0x01,
BNXT_RE_WC_TYPE_RECV_UD = 0x02,
BNXT_RE_WC_TYPE_RECV_RAW = 0x03,
BNXT_RE_WC_TYPE_NOOP = 0x0D,
BNXT_RE_WC_TYPE_TERM = 0x0E,
BNXT_RE_WC_TYPE_COFF = 0x0F
};
#define BNXT_RE_WC_OPCD_RECV 0x80
enum bnxt_re_req_wc_status {
BNXT_RE_REQ_ST_OK = 0x00,
BNXT_RE_REQ_ST_BAD_RESP = 0x01,
BNXT_RE_REQ_ST_LOC_LEN = 0x02,
BNXT_RE_REQ_ST_LOC_QP_OP = 0x03,
BNXT_RE_REQ_ST_PROT = 0x04,
BNXT_RE_REQ_ST_MEM_OP = 0x05,
BNXT_RE_REQ_ST_REM_INVAL = 0x06,
BNXT_RE_REQ_ST_REM_ACC = 0x07,
BNXT_RE_REQ_ST_REM_OP = 0x08,
BNXT_RE_REQ_ST_RNR_NAK_XCED = 0x09,
BNXT_RE_REQ_ST_TRNSP_XCED = 0x0A,
BNXT_RE_REQ_ST_WR_FLUSH = 0x0B
};
enum bnxt_re_rsp_wc_status {
BNXT_RE_RSP_ST_OK = 0x00,
BNXT_RE_RSP_ST_LOC_ACC = 0x01,
BNXT_RE_RSP_ST_LOC_LEN = 0x02,
BNXT_RE_RSP_ST_LOC_PROT = 0x03,
BNXT_RE_RSP_ST_LOC_QP_OP = 0x04,
BNXT_RE_RSP_ST_MEM_OP = 0x05,
BNXT_RE_RSP_ST_REM_INVAL = 0x06,
BNXT_RE_RSP_ST_WR_FLUSH = 0x07,
BNXT_RE_RSP_ST_HW_FLUSH = 0x08
};
enum bnxt_re_hdr_offset {
BNXT_RE_HDR_WT_MASK = 0xFF,
BNXT_RE_HDR_FLAGS_MASK = 0xFF,
BNXT_RE_HDR_FLAGS_SHIFT = 0x08,
BNXT_RE_HDR_WS_MASK = 0xFF,
BNXT_RE_HDR_WS_SHIFT = 0x10,
BNXT_RE_HDR_ZB_SHIFT = 0x16,
BNXT_RE_HDR_MW_SHIFT = 0x17,
BNXT_RE_HDR_ACC_SHIFT = 0x18,
BNXT_RE_HDR_IL_MASK = 0x0F,
BNXT_RE_HDR_IL_SHIFT = 0x18,
};
enum bnxt_re_db_que_type {
BNXT_RE_QUE_TYPE_SQ = 0x00,
BNXT_RE_QUE_TYPE_RQ = 0x01,
BNXT_RE_QUE_TYPE_SRQ = 0x02,
BNXT_RE_QUE_TYPE_SRQ_ARM = 0x03,
BNXT_RE_QUE_TYPE_CQ = 0x04,
BNXT_RE_QUE_TYPE_CQ_ARMSE = 0x05,
BNXT_RE_QUE_TYPE_CQ_ARMALL = 0x06,
BNXT_RE_QUE_TYPE_CQ_ARMENA = 0x07,
BNXT_RE_QUE_TYPE_SRQ_ARMENA = 0x08,
BNXT_RE_QUE_TYPE_CQ_CUT_ACK = 0x09,
BNXT_RE_PUSH_TYPE_START = 0x0C,
BNXT_RE_PUSH_TYPE_END = 0x0D,
BNXT_RE_QUE_TYPE_NULL = 0x0F
};
enum bnxt_re_db_mask {
BNXT_RE_DB_INDX_MASK = 0xFFFFFFUL,
BNXT_RE_DB_PILO_MASK = 0x0FFUL,
BNXT_RE_DB_PILO_SHIFT = 0x18,
BNXT_RE_DB_QID_MASK = 0xFFFFFUL,
BNXT_RE_DB_PIHI_MASK = 0xF00UL,
BNXT_RE_DB_PIHI_SHIFT = 0x0C, /* Because mask is 0xF00 */
BNXT_RE_DB_TYP_MASK = 0x0FUL,
BNXT_RE_DB_TYP_SHIFT = 0x1C,
BNXT_RE_DB_VALID_SHIFT = 0x1A,
BNXT_RE_DB_EPOCH_SHIFT = 0x18,
BNXT_RE_DB_TOGGLE_SHIFT = 0x19,
};
enum bnxt_re_psns_mask {
BNXT_RE_PSNS_SPSN_MASK = 0xFFFFFF,
BNXT_RE_PSNS_OPCD_MASK = 0xFF,
BNXT_RE_PSNS_OPCD_SHIFT = 0x18,
BNXT_RE_PSNS_NPSN_MASK = 0xFFFFFF,
BNXT_RE_PSNS_FLAGS_MASK = 0xFF,
BNXT_RE_PSNS_FLAGS_SHIFT = 0x18
};
enum bnxt_re_msns_mask {
BNXT_RE_SQ_MSN_SEARCH_START_PSN_MASK = 0xFFFFFFUL,
BNXT_RE_SQ_MSN_SEARCH_START_PSN_SHIFT = 0,
BNXT_RE_SQ_MSN_SEARCH_NEXT_PSN_MASK = 0xFFFFFF000000ULL,
BNXT_RE_SQ_MSN_SEARCH_NEXT_PSN_SHIFT = 0x18,
BNXT_RE_SQ_MSN_SEARCH_START_IDX_MASK = 0xFFFF000000000000ULL,
BNXT_RE_SQ_MSN_SEARCH_START_IDX_SHIFT = 0x30
};
enum bnxt_re_bcqe_mask {
BNXT_RE_BCQE_PH_MASK = 0x01,
BNXT_RE_BCQE_TYPE_MASK = 0x0F,
BNXT_RE_BCQE_TYPE_SHIFT = 0x01,
BNXT_RE_BCQE_RESIZE_TOG_MASK = 0x03,
BNXT_RE_BCQE_RESIZE_TOG_SHIFT = 0x05,
BNXT_RE_BCQE_STATUS_MASK = 0xFF,
BNXT_RE_BCQE_STATUS_SHIFT = 0x08,
BNXT_RE_BCQE_FLAGS_MASK = 0xFFFFU,
BNXT_RE_BCQE_FLAGS_SHIFT = 0x10,
/* wr_id for V1/V2 */
BNXT_RE_BCQE_RWRID_MASK = 0xFFFFFU,
/* higher 16b of source QP for V1/V2 */
BNXT_RE_BCQE_SRCQP_MASK = 0xFF,
BNXT_RE_BCQE_SRCQP_SHIFT = 0x18
};
enum bnxt_re_rc_flags_mask {
BNXT_RE_RC_FLAGS_SRQ_RQ_MASK = 0x01,
BNXT_RE_RC_FLAGS_IMM_MASK = 0x02,
BNXT_RE_RC_FLAGS_IMM_SHIFT = 0x01,
BNXT_RE_RC_FLAGS_INV_MASK = 0x04,
BNXT_RE_RC_FLAGS_INV_SHIFT = 0x02,
BNXT_RE_RC_FLAGS_RDMA_MASK = 0x08,
BNXT_RE_RC_FLAGS_RDMA_SHIFT = 0x03
};
enum bnxt_re_ud_flags_mask {
BNXT_RE_UD_FLAGS_SRQ_RQ_MASK = 0x01,
BNXT_RE_UD_FLAGS_SRQ_RQ_SFT = 0x00,
BNXT_RE_UD_FLAGS_IMM_MASK = 0x02,
BNXT_RE_UD_FLAGS_IMM_SFT = 0x01,
BNXT_RE_UD_FLAGS_IP_VER_MASK = 0x30,
BNXT_RE_UD_FLAGS_IP_VER_SFT = 0x4,
/* the following has been removed in V3 */
BNXT_RE_UD_FLAGS_META_MASK = 0x3C0,
BNXT_RE_UD_FLAGS_META_SFT = 0x6,
BNXT_RE_UD_FLAGS_EXT_META_MASK = 0xC00,
BNXT_RE_UD_FLAGS_EXT_META_SFT = 0x10,
};
enum bnxt_re_ud_cqe_mask {
BNXT_RE_UD_CQE_MAC_MASK = 0xFFFFFFFFFFFFULL,
BNXT_RE_UD_CQE_SRCQPLO_MASK = 0xFFFF,
BNXT_RE_UD_CQE_SRCQPLO_SHIFT = 0x30,
BNXT_RE_UD_CQE_LEN_MASK = 0x3FFFU
};
enum bnxt_re_que_flags_mask {
BNXT_RE_FLAG_EPOCH_TAIL_SHIFT = 0x0UL,
BNXT_RE_FLAG_EPOCH_HEAD_SHIFT = 0x1UL,
BNXT_RE_FLAG_EPOCH_TAIL_MASK = 0x1UL,
BNXT_RE_FLAG_EPOCH_HEAD_MASK = 0x2UL,
};
enum bnxt_re_db_epoch_flag_shift {
BNXT_RE_DB_EPOCH_TAIL_SHIFT = BNXT_RE_DB_EPOCH_SHIFT,
BNXT_RE_DB_EPOCH_HEAD_SHIFT = (BNXT_RE_DB_EPOCH_SHIFT - 1)
};
enum bnxt_re_ppp_st_en_mask {
BNXT_RE_PPP_ENABLED_MASK = 0x1UL,
BNXT_RE_PPP_STATE_MASK = 0x2UL,
};
enum bnxt_re_ppp_st_shift {
BNXT_RE_PPP_ST_SHIFT = 0x1UL
};
struct bnxt_re_db_hdr {
__u64 typ_qid_indx; /* typ: 4, qid:20 (qid:12 on V3), indx:24 */
};
#define BNXT_RE_CHIP_ID0_CHIP_NUM_SFT 0x00
#define BNXT_RE_CHIP_ID0_CHIP_REV_SFT 0x10
#define BNXT_RE_CHIP_ID0_CHIP_MET_SFT 0x18
#define BNXT_RE_STATIC_WQE_MAX_SGE 0x06
#define BNXT_RE_WQE_MODES_WQE_MODE_MASK 0x01
struct bnxt_re_bcqe {
__u32 flg_st_typ_ph;
__u32 qphi_rwrid; /* This field becomes opaque in V3 */
} __attribute__((packed));
struct bnxt_re_req_cqe {
__u64 qp_handle;
__u32 con_indx; /* 16 bits valid. */
__u32 rsvd1;
__u64 rsvd2;
} __attribute__((packed));
struct bnxt_re_rc_cqe {
__u32 length;
__u32 imm_key;
__u64 qp_handle;
__u64 mr_handle;
} __attribute__((packed));
struct bnxt_re_ud_cqe {
__u32 length; /* 14 bits */
__u32 immd;
__u64 qp_handle;
__u64 qplo_mac; /* 16:48*/
} __attribute__((packed));
struct bnxt_re_term_cqe {
__u64 qp_handle;
__u32 rq_sq_cidx;
__u32 rsvd;
__u64 rsvd1;
} __attribute__((packed));
struct bnxt_re_cutoff_cqe {
__u64 rsvd1;
__u64 rsvd2;
__u64 rsvd3;
__u8 cqe_type_toggle;
__u8 status;
__u16 rsvd4;
__u32 rsvd5;
} __attribute__((packed));
union lower_shdr {
__u64 qkey_len;
__u64 lkey_plkey;
__u64 rva;
};
struct bnxt_re_bsqe {
__u32 rsv_ws_fl_wt;
__u32 key_immd;
union lower_shdr lhdr;
} __attribute__((packed));
struct bnxt_re_psns_ext {
__u32 opc_spsn;
__u32 flg_npsn;
__u16 st_slot_idx;
__u16 rsvd0;
__u32 rsvd1;
} __attribute__((packed));
/* sq_msn_search (size:64b/8B) */
struct bnxt_re_msns {
__u64 start_idx_next_psn_start_psn;
} __attribute__((packed));
struct bnxt_re_psns {
__u32 opc_spsn;
__u32 flg_npsn;
} __attribute__((packed));
struct bnxt_re_sge {
__u64 pa;
__u32 lkey;
__u32 length;
} __attribute__((packed));
struct bnxt_re_send {
__u32 dst_qp;
__u32 avid;
__u64 rsvd;
} __attribute__((packed));
struct bnxt_re_raw {
__u32 cfa_meta;
__u32 ts; /* timestamp for V3 */
__u64 rsvd3; /* timestamp for V1/V2 */
} __attribute__((packed));
struct bnxt_re_rdma {
__u64 rva;
__u32 rkey;
__u32 ts; /* timestamp for V3 */
} __attribute__((packed));
struct bnxt_re_atomic {
__u64 swp_dt;
__u64 cmp_dt;
} __attribute__((packed));
struct bnxt_re_inval {
__u64 rsvd[2];
} __attribute__((packed));
struct bnxt_re_bind {
__u64 va;
__u64 len; /* only 40 bits are valid for V1/V2. Full 64-bit for V3 */
} __attribute__((packed));
struct bnxt_re_brqe {
__u32 rsv_ws_fl_wt;
__u32 opaque; /* opaque is V3 only */
__u32 wrid; /* wrid is V1/V2 only */
__u32 rsvd1;
} __attribute__((packed));
/* V1/V2 only. For V3, sge immediately follows struct bnxt_re_brqe */
struct bnxt_re_rqe {
__u64 rsvd[2];
} __attribute__((packed));
/* SRQ */
struct bnxt_re_srqe {
__u64 rsvd[2];
} __attribute__((packed));
struct bnxt_re_push_wqe {
__u64 addr[32];
} __attribute__((packed));
#ifdef __cplusplus
}
#endif
#endif
+2 -9
Просмотреть файл
@@ -26,17 +26,10 @@
#define LIBRARY_SRC_GDA_BNXT_GDA_PROVIDER_HPP_
extern "C" {
#include <infiniband/bnxt_re_dv.h>
#include <infiniband/bnxt_re_hsi.h>
#include "gda/bnxt/bnxt_re_dv.h"
#include "gda/bnxt/bnxt_re_hsi.h"
}
#define GDA_DEFAULT_GID 3
#define GDA_MAX_ATOMIC 1
#define GDA_OP_RDMA_WRITE BNXT_RE_WR_OPCD_RDMA_WRITE
#define GDA_OP_RDMA_READ BNXT_RE_WR_OPCD_RDMA_READ
#define GDA_OP_ATOMIC_FA BNXT_RE_WR_OPCD_ATOMIC_FA
#define GDA_OP_ATOMIC_CS BNXT_RE_WR_OPCD_ATOMIC_CS
#define bnxt_re_get_cqe_sz() (sizeof(struct bnxt_re_req_cqe) + \
sizeof(struct bnxt_re_bcqe))
+1 -1
Просмотреть файл
@@ -267,7 +267,7 @@ __device__ void QueuePair::post_wqe_rma(int pe, int32_t length, uintptr_t *laddr
uint32_t rma_slots = 3; // (Three slots: hdr, rdma, sge)
inline_msg = length <= inline_threshold &&
opcode == GDA_OP_RDMA_WRITE;
opcode == gda_op_rdma_write;
hdr_ptr = (struct bnxt_re_bsqe*) bnxt_re_get_hwqe(&sq, 0);
rdma_ptr = (struct bnxt_re_rdma*) bnxt_re_get_hwqe(&sq, 1);
+5 -5
Просмотреть файл
@@ -99,7 +99,7 @@ __device__ void GDAContext::amo_add(void *dst, T value, int pe) {
uint8_t lane = __ffsll((unsigned long long)turns) - 1;
int pe_turn = __shfl(pe, lane);
if (pe_turn == pe) {
qps[pe].atomic_nofetch(base_heap[pe] + L_offset, value, 0, pe, GDA_OP_ATOMIC_FA);
qps[pe].atomic_nofetch(base_heap[pe] + L_offset, value, 0, pe);
need_turn = false;
}
turns = __ballot(need_turn);
@@ -113,7 +113,7 @@ __device__ void GDAContext::amo_set(void *dst, T value, int pe) {
T ret_val;
T cond = 0;
for (int i = 0; i < WF_SIZE; i++) { //TODO: this looks wrong
while ((ret_val = qps[pe].atomic_fetch(base_heap[pe] + L_offset, value, cond, pe, GDA_OP_ATOMIC_CS))) {
while ((ret_val = qps[pe].atomic_cas(base_heap[pe] + L_offset, value, cond, pe))) {
if (ret_val == cond) { break; }
cond = ret_val;
}
@@ -171,7 +171,7 @@ __device__ void GDAContext::amo_cas(void *dst, T value, T cond, int pe) {
if constexpr (sizeof(T) != 8) { printf("rocshmem::gda:amo_cas not implemented for non-64bit types.\n"); abort(); }//TODO:support for non-uint64t
uint64_t L_offset = reinterpret_cast<char *>(dst) - base_heap[my_pe];
for (int i = 0; i < WF_SIZE; i++) { //TODO: this looks wrong
qps[pe].atomic_nofetch(base_heap[pe] + L_offset, value, cond, pe, GDA_OP_ATOMIC_CS);
qps[pe].atomic_cas_nofetch(base_heap[pe] + L_offset, value, cond, pe);
}
}
@@ -186,7 +186,7 @@ __device__ T GDAContext::amo_fetch_add(void *dst, T value, int pe) {
uint8_t lane = __ffsll((unsigned long long)turns) - 1;
int pe_turn = __shfl(pe, lane);
if (pe_turn == pe) {
ret_val = qps[pe].atomic_fetch(base_heap[pe] + L_offset, value, 0, pe, GDA_OP_ATOMIC_FA);
ret_val = qps[pe].atomic_fetch(base_heap[pe] + L_offset, value, 0, pe);
need_turn = false;
}
turns = __ballot(need_turn);
@@ -200,7 +200,7 @@ __device__ T GDAContext::amo_fetch_cas(void *dst, T value, T cond, int pe) {
uint64_t L_offset = reinterpret_cast<char *>(dst) - base_heap[my_pe];
T ret_val;
for (int i = 0; i < WF_SIZE; i++) {
ret_val = qps[pe].atomic_fetch(base_heap[pe] + L_offset, value, cond, pe, GDA_OP_ATOMIC_CS);
ret_val = qps[pe].atomic_cas(base_heap[pe] + L_offset, value, cond, pe);
}
return ret_val;
}
-5
Просмотреть файл
@@ -30,11 +30,8 @@ static void dump_ibv_device(struct ibv_device *x);
static void dump_ibv_pd(struct ibv_pd *x);
static void dump_ibv_port_attr(struct ibv_port_attr *x);
static void dump_ibv_qp(struct ibv_qp *qp, int conn_num);
#if defined(GDA_MLX5)
static void dump_mlx5dv_qp(struct mlx5dv_qp *qp_dv, int conn_num);
static void dump_mlx5dv_cq(struct mlx5dv_cq *cq_dv, int conn_num);
#endif
static void dump_ibv_context(struct ibv_context* x) {
/*
@@ -194,7 +191,6 @@ void dump_ibv_qp(struct ibv_qp *qp, int conn_num) {
DPRINTF("=========== QP_DUMP_END CONNECTION#%d ========\n", conn_num);
}
#if defined(GDA_MLX5)
void dump_mlx5dv_qp(struct mlx5dv_qp *qp_dv, int conn_num) {
DPRINTF("\n");
DPRINTF("===============================================\n");
@@ -235,6 +231,5 @@ void dump_mlx5dv_cq(struct mlx5dv_cq *cq_dv, int conn_num) {
DPRINTF(" (uint64_t) comp_mask = 0x%lx\n", cq_dv->comp_mask);
DPRINTF("================== CQ_DUMP_END ================\n");
}
#endif // GDA_MLX5
#endif /* LIBRARY_SRC_GDA_DEBUG_GDA_HPP_ */
+39
Просмотреть файл
@@ -0,0 +1,39 @@
/******************************************************************************
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef LIBRARY_SRC_GDA_IONIC_GDA_PROVIDER_HPP_
#define LIBRARY_SRC_GDA_IONIC_GDA_PROVIDER_HPP_
#ifdef GDA_IONIC
extern "C" {
#include <infiniband/ionic_dv.h>
#include <infiniband/ionic_fw.h>
}
#endif
#define SPIN_LOCK_INVALID 0xdead
#define SPIN_LOCK_UNLOCKED 0x1234
#define SPIN_LOCK_LOCKED 0xabcd
#endif //LIBRARY_SRC_GDA_IONIC_GDA_PROVIDER_HPP_
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+37
Просмотреть файл
@@ -0,0 +1,37 @@
/******************************************************************************
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef LIBRARY_SRC_GDA_MLX5_GDA_PROVIDER_HPP_
#define LIBRARY_SRC_GDA_MLX5_GDA_PROVIDER_HPP_
extern "C" {
#include "gda/mlx5/mlx5dv.h"
}
typedef union db_reg {
uint64_t *ptr;
uintptr_t uint;
} db_reg_t;
#endif //LIBRARY_SRC_GDA_MLX5_GDA_PROVIDER_HPP_
+43 -17
Просмотреть файл
@@ -28,15 +28,13 @@
#include "backend_gda.hpp"
#include "endian.hpp"
#if defined(GDA_MLX5)
#include "segment_builder.hpp"
#endif
#include "util.hpp"
#include "constants.hpp"
namespace rocshmem {
QueuePair::QueuePair(struct ibv_pd* pd) {
QueuePair::QueuePair(struct ibv_pd* pd, int gda_vendor) {
int access = IBV_ACCESS_LOCAL_WRITE
| IBV_ACCESS_REMOTE_WRITE
| IBV_ACCESS_REMOTE_READ
@@ -56,17 +54,35 @@ QueuePair::QueuePair(struct ibv_pd* pd) {
mr_fetching_atomic = ibv_reg_mr(pd, fetching_atomic, 8 * FETCHING_ATOMIC_CNT, access);
CHECK_NNULL(mr_fetching_atomic, "ibv_reg_mr");
#if defined(GDA_MLX5)
nonfetching_atomic_lkey = htobe32(mr_nonfetching_atomic->lkey);
fetching_atomic_lkey = htobe32(mr_fetching_atomic->lkey);
#else
nonfetching_atomic_lkey = mr_nonfetching_atomic->lkey;
fetching_atomic_lkey = mr_fetching_atomic->lkey;
#endif
if (gda_vendor == GDAVendor::MLX5) {
nonfetching_atomic_lkey = htobe32(mr_nonfetching_atomic->lkey);
fetching_atomic_lkey = htobe32(mr_fetching_atomic->lkey);
} else {
nonfetching_atomic_lkey = mr_nonfetching_atomic->lkey;
fetching_atomic_lkey = mr_fetching_atomic->lkey;
}
for(int i{0}; i < FETCHING_ATOMIC_CNT; i+=WF_SIZE) {
fetching_atomic_freelist->push_back(fetching_atomic + i);
}
/* Set Correct opcodes for each NIC */
#ifdef GDA_IONIC
gda_op_rdma_write = IONIC_V2_OP_RDMA_WRITE;
gda_op_atomic_fa = IONIC_V2_OP_ATOMIC_FA;
gda_op_atomic_cs = IONIC_V2_OP_ATOMIC_CS;
#endif
if (gda_vendor == GDAVendor::BNXT) {
gda_op_rdma_write = BNXT_RE_WR_OPCD_RDMA_WRITE;
gda_op_rdma_read = BNXT_RE_WR_OPCD_RDMA_READ;
gda_op_atomic_fa = BNXT_RE_WR_OPCD_ATOMIC_FA;
gda_op_atomic_cs = BNXT_RE_WR_OPCD_ATOMIC_CS;
} else if (gda_vendor == GDAVendor::MLX5) {
gda_op_rdma_write = MLX5_OPCODE_RDMA_WRITE;
gda_op_rdma_read = MLX5_OPCODE_RDMA_READ;
gda_op_atomic_fa = MLX5_OPCODE_ATOMIC_FA;
gda_op_atomic_cs = MLX5_OPCODE_ATOMIC_CS;
}
}
QueuePair::~QueuePair() {
@@ -447,7 +463,7 @@ __device__ void QueuePair::post_wqe_rma(int pe, int32_t size, uintptr_t *laddr,
seg_build.update_ctrl_seg(my_sq_counter, opcode, 0, qp_num, MLX5_WQE_CTRL_CQ_UPDATE, 3, 0, 0);
seg_build.update_raddr_seg(raddr, rkey);
if (size <= inline_threshold && opcode == GDA_OP_RDMA_WRITE) {
if (size <= inline_threshold && opcode == gda_op_rdma_write) {
seg_build.update_inl_data_seg(laddr, size);
} else {
seg_build.update_data_seg(laddr, size, lkey);
@@ -629,23 +645,33 @@ __device__ uint64_t QueuePair::post_wqe_amo(int pe, int32_t size, uintptr_t *rad
__device__ void QueuePair::put_nbi(void *dest, const void *source, size_t nelems, int pe) {
uintptr_t *src = reinterpret_cast<uintptr_t*>(const_cast<void*>(source));
uintptr_t *dst = reinterpret_cast<uintptr_t*>(dest);
post_wqe_rma(pe, nelems, src, dst, GDA_OP_RDMA_WRITE);
post_wqe_rma(pe, nelems, src, dst, gda_op_rdma_write);
}
__device__ void QueuePair::get_nbi(void *dest, const void *source, size_t nelems, int pe) {
uintptr_t *src = reinterpret_cast<uintptr_t*>(const_cast<void*>(source));
uintptr_t *dst = reinterpret_cast<uintptr_t*>(dest);
post_wqe_rma(pe, nelems, dst, src, GDA_OP_RDMA_READ);
post_wqe_rma(pe, nelems, dst, src, gda_op_rdma_read);
}
__device__ int64_t QueuePair::atomic_fetch(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe, uint8_t atomic_op) {
__device__ int64_t QueuePair::atomic_cas(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe) {
uintptr_t *dst = reinterpret_cast<uintptr_t*>(dest);
return post_wqe_amo(pe, sizeof(int64_t), dst, atomic_op, atomic_data, atomic_cmp, true);
return post_wqe_amo(pe, sizeof(int64_t), dst, gda_op_atomic_cs, atomic_data, atomic_cmp, true);
}
__device__ void QueuePair::atomic_nofetch(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe, uint8_t atomic_op) {
__device__ int64_t QueuePair::atomic_cas_nofetch(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe) {
uintptr_t *dst = reinterpret_cast<uintptr_t*>(dest);
post_wqe_amo(pe, sizeof(int64_t), dst, atomic_op, atomic_data, atomic_cmp, false);
return post_wqe_amo(pe, sizeof(int64_t), dst, gda_op_atomic_cs, atomic_data, atomic_cmp, false);
}
__device__ int64_t QueuePair::atomic_fetch(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe) {
uintptr_t *dst = reinterpret_cast<uintptr_t*>(dest);
return post_wqe_amo(pe, sizeof(int64_t), dst, gda_op_atomic_fa, atomic_data, atomic_cmp, true);
}
__device__ void QueuePair::atomic_nofetch(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe) {
uintptr_t *dst = reinterpret_cast<uintptr_t*>(dest);
post_wqe_amo(pe, sizeof(int64_t), dst, gda_op_atomic_fa, atomic_data, atomic_cmp, false);
}
} // namespace rocshmem
+40 -42
Просмотреть файл
@@ -37,48 +37,18 @@
#include "rocshmem_config.h"
#include "endian.h"
#include "constants.hpp"
#ifdef GDA_IONIC
extern "C" {
#include <infiniband/ionic_dv.h>
#include <infiniband/ionic_fw.h>
}
#elif defined(GDA_BNXT)
#include "bnxt/provider_gda_bnxt.hpp"
#elif defined(GDA_MLX5)
#include <infiniband/mlx5dv.h>
#else
#error "Please select an RDMA provider"
#endif
#include "gda/ionic/provider_gda_ionic.hpp"
#include "gda/mlx5/provider_gda_mlx5.hpp"
#include "gda/bnxt/provider_gda_bnxt.hpp"
#include "containers/free_list.hpp"
#include "memory/hip_allocator.hpp"
#ifdef GDA_IONIC
#define GDA_MAX_ATOMIC 15
#define GDA_OP_RDMA_WRITE IONIC_V2_OP_RDMA_WRITE
#define GDA_OP_ATOMIC_FA IONIC_V2_OP_ATOMIC_FA
#define GDA_OP_ATOMIC_CS IONIC_V2_OP_ATOMIC_CS
#elif defined(GDA_MLX5)
#define GDA_MAX_ATOMIC 1
#define GDA_OP_RDMA_WRITE MLX5_OPCODE_RDMA_WRITE
#define GDA_OP_RDMA_READ MLX5_OPCODE_RDMA_READ
#define GDA_OP_ATOMIC_FA MLX5_OPCODE_ATOMIC_FA
#define GDA_OP_ATOMIC_CS MLX5_OPCODE_ATOMIC_CS
#endif
namespace rocshmem {
class GDABackend;
typedef union db_reg {
uint64_t *ptr;
uintptr_t uint;
} db_reg_t;
#define SPIN_LOCK_INVALID 0xdead
#define SPIN_LOCK_UNLOCKED 0x1234
#define SPIN_LOCK_LOCKED 0xabcd
class QueuePair {
public:
friend GDABackend;
@@ -86,7 +56,7 @@ class QueuePair {
/**
* @brief Constructor.
*/
explicit QueuePair(struct ibv_pd* pd);
explicit QueuePair(struct ibv_pd* pd, int gda_vendor);
/**
* @brief Destructor.
@@ -125,11 +95,10 @@ class QueuePair {
* @param[in] value Data value for the atomic operation.
* @param[in] cond Used in atomic comparisons.
* @param[in] pe Destination processing element of data transmission.
* @param[in] atomic_op The atomic operation to perform.
*
* @return An atomic value
*/
__device__ int64_t atomic_fetch(void *dest, int64_t value, int64_t cond, int pe, uint8_t atomic_op);
__device__ int64_t atomic_fetch(void *dest, int64_t value, int64_t cond, int pe);
/**
* @brief Create and enqueue an atomic fetch work queue entry (wqe).
@@ -138,9 +107,30 @@ class QueuePair {
* @param[in] value Data value for the atomic operation.
* @param[in] cond Used in atomic comparisons.
* @param[in] pe Destination processing element of data transmission.
* @param[in] atomic_op The atomic operation to perform.
*/
__device__ void atomic_nofetch(void *dest, int64_t value, int64_t cond, int pe, uint8_t atomic_op);
__device__ void atomic_nofetch(void *dest, int64_t value, int64_t cond, int pe);
/**
* @brief Create and enqueue an atomic cas work queue entry (wqe).
*
* @param[in] dest Destination address for data transmission.
* @param[in] value Data value for the atomic operation.
* @param[in] cond Used in atomic comparisons.
* @param[in] pe Destination processing element of data transmission.
*
* @return An atomic value
*/
__device__ int64_t atomic_cas(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe);
/**
* @brief Create and enqueue an atomic cas work queue entry (wqe).
*
* @param[in] dest Destination address for data transmission.
* @param[in] value Data value for the atomic operation.
* @param[in] cond Used in atomic comparisons.
* @param[in] pe Destination processing element of data transmission.
*/
__device__ int64_t atomic_cas_nofetch(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe);
char *const *base_heap{nullptr};
@@ -233,14 +223,18 @@ class QueuePair {
uint32_t sq_dbprod{0};
uint32_t sq_prod{0};
uint32_t sq_msn{0};
#endif
#elif defined(GDA_BNXT)
/* GDAVendor::BNXT START */
uint64_t *dbr;
struct bnxt_device_cq cq;
struct bnxt_device_sq sq;
__device__ int poll_cq();
#else // GDA_MLX5
/* GDAVendor::BNXT END */
/* GDAVendor::MLX5 START */
db_reg_t db{};
@@ -301,7 +295,7 @@ class QueuePair {
static constexpr size_t OUTSTANDING_TABLE_SIZE = 65536;
uint64_t outstanding_wqes[OUTSTANDING_TABLE_SIZE]{0};
#endif // GDA_IONIC
/* GDAVendor::MLX5 END */
uint32_t inline_threshold{0};
@@ -325,6 +319,10 @@ class QueuePair {
HIPAllocator allocator{};
uint8_t gda_op_rdma_write;
uint8_t gda_op_rdma_read;
uint8_t gda_op_atomic_fa;
uint8_t gda_op_atomic_cs;
};
} // namespace rocshmem
+1 -1
Просмотреть файл
@@ -25,7 +25,7 @@
#ifndef LIBRARY_SRC_GDA_SEGMENT_BUILDER_HPP_
#define LIBRARY_SRC_GDA_SEGMENT_BUILDER_HPP_
#include <infiniband/mlx5dv.h>
#include "gda/mlx5/provider_gda_mlx5.hpp"
#include "util.hpp"