Runtime selection of IONIC (#272)
* Split ionic code to a subdirectory; dyld libionicl; move the fntable to provider_gda_xxx.hpp
pass the pattr to ionic_setup_pd, include endian.hpp
Enable building IONIC conduit for runtime selection
* Uniform style for the fntable between ionic and the rest
* Move mlx5 gda conduit to a subdir; resolve conflict with backend_can_run
function
* Don't forget to init qp for ionic, move mlx5 specialized init qp code to
the mlx5 subdir
* Don't add cmakecaches...
Typo: GDA_BXNT
* Add gda-ionic to all_backends build scripts
* Apply suggestion from reviews
Co-authored-by: Omri Mor <omri50@gmail.com>
Co-authored-by: Edgar Gabriel <edgar.gabriel@amd.com>
* Remove duplicate definitiion of DLSYM macros
---------
Co-authored-by: Omri Mor <omri50@gmail.com>
Co-authored-by: Edgar Gabriel <edgar.gabriel@amd.com>
[ROCm/rocshmem commit: 3cfe76522e]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
6c4325d131
Коммит
bb8406b013
@@ -25,7 +25,6 @@
|
||||
find_package(PkgConfig QUIET)
|
||||
if (PkgConfig_FOUND)
|
||||
if (IBVerbs_ROOT )
|
||||
# We don't use IBVerbs_DIR as this is supposed to be used when finding hwloc-config.cmake only
|
||||
set(ENV{PKG_CONFIG_PATH} "${IBVerbs_ROOT}/lib/pkgconfig:$ENV{PKG_CONFIG_PATH}")
|
||||
endif()
|
||||
pkg_check_modules(PC_IBVerbs QUIET libibverbs)
|
||||
@@ -42,32 +41,11 @@ find_library(IBVerbs_LIBRARY
|
||||
PATH_SUFFIXES lib lib64
|
||||
)
|
||||
|
||||
if (GDA_IONIC)
|
||||
list(APPEND provider_vars IBVerbs_IONIC_LIBRARY IBVerbs_IONIC_INCLUDE_DIR)
|
||||
find_path(IBVerbs_IONIC_INCLUDE_DIR infiniband/ionic_dv.h
|
||||
HINTS ${PC_IBVerbs_INCLUDEDIR} ${PC_IBVerbs_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES include
|
||||
)
|
||||
|
||||
find_library(IBVerbs_IONIC_LIBRARY
|
||||
NAMES ionic libionic
|
||||
HINTS ${PC_IBVerbs_LIBDIR} ${PC_IBVerbs_LIBRARY_DIRS}
|
||||
PATH_SUFFIXES lib lib64
|
||||
)
|
||||
|
||||
add_library(IBVerbs::verbs_ionic UNKNOWN IMPORTED)
|
||||
set_target_properties(IBVerbs::verbs_ionic PROPERTIES
|
||||
IMPORTED_LOCATION "${IBVerbs_IONIC_LIBRARY}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${IBVerbs_IONIC_INCLUDE_DIR}"
|
||||
)
|
||||
endif()
|
||||
|
||||
find_package_handle_standard_args(IBVerbs DEFAULT_MSG
|
||||
IBVerbs_LIBRARY
|
||||
IBVerbs_INCLUDE_DIR
|
||||
${provider_vars}
|
||||
)
|
||||
mark_as_advanced(IBVerbs_LIBRARY IBVerbs_INCLUDE_DIR ${provider_vars})
|
||||
mark_as_advanced(IBVerbs_LIBRARY IBVerbs_INCLUDE_DIR)
|
||||
|
||||
if (IBVerbs_FOUND)
|
||||
add_library(IBVerbs::verbs UNKNOWN IMPORTED)
|
||||
@@ -76,9 +54,4 @@ set_target_properties(IBVerbs::verbs PROPERTIES
|
||||
INTERFACE_COMPILE_OPTIONS "${PC_IBVerbs_CFLAGS_OTHER}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${IBVerbs_INCLUDE_DIR}"
|
||||
)
|
||||
|
||||
target_link_libraries(IBVerbs::verbs INTERFACE
|
||||
$<TARGET_NAME_IF_EXISTS:IBVerbs::verbs_ionic>
|
||||
)
|
||||
|
||||
endif()
|
||||
|
||||
@@ -40,7 +40,7 @@ cmake \
|
||||
-DUSE_GDA=ON \
|
||||
-DGDA_MLX5=ON \
|
||||
-DGDA_BNXT=ON \
|
||||
-DGDA_IONIC=OFF \
|
||||
-DGDA_IONIC=ON \
|
||||
-DUSE_RO=ON \
|
||||
-DUSE_IPC=ON \
|
||||
-DUSE_THREADS=OFF \
|
||||
|
||||
@@ -35,7 +35,6 @@ target_sources(
|
||||
gda_team.cpp
|
||||
queue_pair.cpp
|
||||
endian.cpp
|
||||
segment_builder.cpp
|
||||
topology.cpp
|
||||
)
|
||||
|
||||
@@ -48,4 +47,6 @@ target_link_libraries(
|
||||
numa
|
||||
)
|
||||
|
||||
add_subdirectory(mlx5)
|
||||
add_subdirectory(bnxt)
|
||||
add_subdirectory(ionic)
|
||||
|
||||
@@ -122,11 +122,7 @@ GDABackend::~GDABackend() {
|
||||
cleanup_heap_memory_rkey();
|
||||
cleanup_ibv();
|
||||
|
||||
if (bnxtdv_handle_ != nullptr)
|
||||
dlclose(bnxtdv_handle_);
|
||||
|
||||
if (mlx5dv_handle_ != nullptr)
|
||||
dlclose(mlx5dv_handle_);
|
||||
close_dv_libs();
|
||||
}
|
||||
|
||||
void GDABackend::read_env() {
|
||||
@@ -532,52 +528,45 @@ void GDABackend::rte_barrier() {
|
||||
}
|
||||
}
|
||||
|
||||
int GDABackend::mlx5_dv_dl_init () {
|
||||
mlx5dv_handle_ = dlopen("libmlx5.so", RTLD_NOW);
|
||||
if (!mlx5dv_handle_) {
|
||||
DPRINTF("Could not open libmlx5.so. Returning\n");
|
||||
return ROCSHMEM_ERROR;
|
||||
}
|
||||
|
||||
DLSYM_HELPER(mlx5dv_ftable_, mlx5dv_, mlx5dv_handle_, init_obj);
|
||||
return ROCSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/* Currently we only check whether we can dlopen a Direct Verbs library.
|
||||
** We might need to extend this logic to check whether we have interfaces that
|
||||
** can use those DV libraries
|
||||
*/
|
||||
* We might need to extend this logic to check whether we have interfaces that
|
||||
* can use those DV libraries
|
||||
*/
|
||||
int GDABackend::backend_can_run() {
|
||||
void *handle{nullptr};
|
||||
|
||||
/* Try opening bnxt DV libraries */
|
||||
handle = dlopen("libbnxt_re.so", RTLD_NOW);
|
||||
#if defined(GDA_BNXT)
|
||||
handle = bnxt_dv_dlopen();
|
||||
if (handle) {
|
||||
dlclose(handle);
|
||||
return ROCSHMEM_SUCCESS;
|
||||
} else {
|
||||
/* Try hard-coded PATH */
|
||||
handle = dlopen("/usr/local/lib/libbnxt_re.so", RTLD_NOW);
|
||||
if (handle) {
|
||||
dlclose(handle);
|
||||
return ROCSHMEM_SUCCESS;
|
||||
}
|
||||
}
|
||||
#endif //defined(GDA_BNXT)
|
||||
|
||||
/* Try opening ionic DV libraries */
|
||||
#if defined(GDA_IONIC)
|
||||
handle = ionic_dv_dlopen();
|
||||
if (handle) {
|
||||
dlclose(handle);
|
||||
return ROCSHMEM_SUCCESS;
|
||||
}
|
||||
#endif //defined(GDA_IONIC)
|
||||
|
||||
/* Try opening mlx5 DV libraries */
|
||||
handle = dlopen("libmlx5.so", RTLD_NOW);
|
||||
#if defined(GDA_MLX5)
|
||||
handle = mlx5_dv_dlopen();
|
||||
if (handle) {
|
||||
dlclose(handle);
|
||||
return ROCSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/* ToDo: opening ionic DV libraries */
|
||||
#endif //defined(GDA_MLX5)
|
||||
|
||||
return ROCSHMEM_ERROR;
|
||||
}
|
||||
|
||||
void GDABackend::setup_ibv() {
|
||||
autodetect_dv_libs();
|
||||
open_dv_libs();
|
||||
|
||||
open_ib_device();
|
||||
|
||||
@@ -599,22 +588,22 @@ void GDABackend::cleanup_ibv() {
|
||||
CHECK_HIP(hipHostUnregister(db_region_attr.dbr));
|
||||
|
||||
for (int i = 0; i < qps.size(); i++) {
|
||||
err = bnxtdv_ftable_.destroy_qp(qps[i]);
|
||||
err = bnxt_re_dv.destroy_qp(qps[i]);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_destroy_qp");
|
||||
|
||||
err = bnxtdv_ftable_.umem_dereg(bnxt_qps[i].attr.rq_umem_handle);
|
||||
err = bnxt_re_dv.umem_dereg(bnxt_qps[i].attr.rq_umem_handle);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_umem_dereg (RQ)");
|
||||
|
||||
err = bnxtdv_ftable_.umem_dereg(bnxt_qps[i].attr.sq_umem_handle);
|
||||
err = bnxt_re_dv.umem_dereg(bnxt_qps[i].attr.sq_umem_handle);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_umem_dereg (SQ)");
|
||||
|
||||
CHECK_HIP(hipFree(bnxt_qps[i].sq_buf));
|
||||
CHECK_HIP(hipFree(bnxt_qps[i].rq_buf));
|
||||
|
||||
err = bnxtdv_ftable_.destroy_cq(cqs[i]);
|
||||
err = bnxt_re_dv.destroy_cq(cqs[i]);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_destroy_cq");
|
||||
|
||||
err = bnxtdv_ftable_.umem_dereg(bnxt_cqs[i].umem_handle);
|
||||
err = bnxt_re_dv.umem_dereg(bnxt_cqs[i].umem_handle);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_umem_dereg");
|
||||
|
||||
CHECK_HIP(hipFree(bnxt_cqs[i].buf));
|
||||
@@ -647,13 +636,14 @@ void GDABackend::cleanup_ibv() {
|
||||
CHECK_ZERO(err, "ibv_close_device");
|
||||
}
|
||||
|
||||
void GDABackend::autodetect_dv_libs() {
|
||||
void GDABackend::open_dv_libs() {
|
||||
int ret;
|
||||
|
||||
#ifdef GDA_IONIC
|
||||
gda_vendor = GDAVendor::IONIC;
|
||||
#endif
|
||||
//TODO: environment variable selection/deselection
|
||||
//this hardcoded init order will always prefer BNXT>IONIC>MLX5
|
||||
//if all three drivers are installed
|
||||
|
||||
#if defined(GDA_BNXT)
|
||||
if (gda_vendor == GDAVendor::NONE) {
|
||||
ret = bnxt_dv_dl_init();
|
||||
|
||||
@@ -663,7 +653,21 @@ void GDABackend::autodetect_dv_libs() {
|
||||
DPRINTF("Initializing rocSHMEM BNXT GDA support failed\n");
|
||||
}
|
||||
}
|
||||
#endif // defined(GDA_BNXT)
|
||||
|
||||
#if defined(GDA_IONIC)
|
||||
if (gda_vendor == GDAVendor::NONE) {
|
||||
ret = ionic_dv_dl_init();
|
||||
|
||||
if (ret == ROCSHMEM_SUCCESS) {
|
||||
gda_vendor = GDAVendor::IONIC;
|
||||
} else {
|
||||
DPRINTF("Initializing rocSHMEM IONIC GDA support failed\n");
|
||||
}
|
||||
}
|
||||
#endif // defined(GDA_IONIC)
|
||||
|
||||
#if defined(GDA_MLX5)
|
||||
if (gda_vendor == GDAVendor::NONE) {
|
||||
ret = mlx5_dv_dl_init();
|
||||
|
||||
@@ -673,13 +677,26 @@ void GDABackend::autodetect_dv_libs() {
|
||||
DPRINTF("Initializing rocSHMEM MLX5 GDA support failed\n");
|
||||
}
|
||||
}
|
||||
#endif // defined(GDA_MLX5)
|
||||
|
||||
if (gda_vendor == GDAVendor::NONE) {
|
||||
printf("Initializing rocSHMEM with IONIC, BNXT, or MLX5 GDA support failed\n");
|
||||
printf("Initializing rocSHMEM with IONIC, BNXT, or MLX5 GDA support failed: no DV library found\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void GDABackend::close_dv_libs() {
|
||||
if (ionicdv_handle_ != nullptr)
|
||||
dlclose(ionicdv_handle_);
|
||||
|
||||
if (bnxtdv_handle_ != nullptr)
|
||||
dlclose(bnxtdv_handle_);
|
||||
|
||||
if (mlx5dv_handle_ != nullptr)
|
||||
dlclose(mlx5dv_handle_);
|
||||
|
||||
gda_vendor = GDAVendor::NONE;
|
||||
}
|
||||
|
||||
void GDABackend::exchange_qp_dest_info() {
|
||||
for (int i = 0; i < qps.size(); i++) {
|
||||
@@ -752,11 +769,7 @@ void GDABackend::setup_gpu_qps() {
|
||||
new (&host_qps[i]) QueuePair(pd_orig, gda_vendor);
|
||||
CHECK_HIP(hipMemcpy(&gpu_qps[i], &host_qps[i], sizeof(QueuePair), hipMemcpyDefault));
|
||||
|
||||
if (gda_vendor == GDAVendor::BNXT) {
|
||||
bnxt_initialize_gpu_qp(&gpu_qps[i], i);
|
||||
} else {
|
||||
initialize_gpu_qp(&gpu_qps[i], i);
|
||||
}
|
||||
initialize_gpu_qp(&gpu_qps[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -844,7 +857,7 @@ void GDABackend::modify_qps_reset_to_init() {
|
||||
|
||||
for (int i =0; i < qps.size() ; i++) {
|
||||
if (gda_vendor == GDAVendor::BNXT) {
|
||||
err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0);
|
||||
err = bnxt_re_dv.modify_qp(qps[i], &attr, attr_mask, 0, 0);
|
||||
} else {
|
||||
err = ibv_modify_qp(qps[i], &attr, attr_mask);
|
||||
}
|
||||
@@ -896,7 +909,7 @@ void GDABackend::modify_qps_init_to_rtr() {
|
||||
}
|
||||
|
||||
if (gda_vendor == GDAVendor::BNXT) {
|
||||
err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0);
|
||||
err = bnxt_re_dv.modify_qp(qps[i], &attr, attr_mask, 0, 0);
|
||||
} else {
|
||||
err = ibv_modify_qp(qps[i], &attr, attr_mask);
|
||||
}
|
||||
@@ -932,7 +945,7 @@ void GDABackend::modify_qps_rtr_to_rts() {
|
||||
attr.sq_psn = dest_info[i].psn;
|
||||
|
||||
if (gda_vendor == GDAVendor::BNXT) {
|
||||
err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0);
|
||||
err = bnxt_re_dv.modify_qp(qps[i], &attr, attr_mask, 0, 0);
|
||||
} else {
|
||||
err = ibv_modify_qp(qps[i], &attr, attr_mask);
|
||||
}
|
||||
@@ -1057,19 +1070,9 @@ void GDABackend::create_parent_domain() {
|
||||
CHECK_NNULL(pd_parent, "ibv_alloc_parent_domain");
|
||||
dump_ibv_pd(pd_parent);
|
||||
|
||||
#ifdef GDA_IONIC
|
||||
ionic_dv_pd_set_sqcmb(pd_parent, false, false, false);
|
||||
ionic_dv_pd_set_rqcmb(pd_parent, false, false, false);
|
||||
|
||||
for (int uxdma_i = 0; uxdma_i < 2; ++uxdma_i) {
|
||||
pd_uxdma[uxdma_i] = ibv_alloc_parent_domain(context, &pattr);
|
||||
CHECK_NNULL(pd_uxdma[uxdma_i], "ibv_alloc_parent_domain (uxdma)");
|
||||
|
||||
ionic_dv_pd_set_sqcmb(pd_uxdma[uxdma_i], false, false, false);
|
||||
ionic_dv_pd_set_rqcmb(pd_uxdma[uxdma_i], false, false, false);
|
||||
ionic_dv_pd_set_udma_mask(pd_uxdma[uxdma_i], 1u << uxdma_i);
|
||||
if (gda_vendor == GDAVendor::IONIC) {
|
||||
ionic_setup_parent_domain(&pattr);
|
||||
}
|
||||
#endif /* GDA_IONIC */
|
||||
}
|
||||
|
||||
void GDABackend::create_cqs(int cqe) {
|
||||
@@ -1099,123 +1102,18 @@ void GDABackend::create_cqs(int cqe) {
|
||||
}
|
||||
|
||||
void GDABackend::initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
|
||||
int hip_dev_id{-1};
|
||||
CHECK_HIP(hipGetDevice(&hip_dev_id));
|
||||
|
||||
#ifdef GDA_IONIC
|
||||
ionic_dv_ctx dvctx;
|
||||
ionic_dv_get_ctx(&dvctx, context);
|
||||
|
||||
void* gpu_db_page = nullptr;
|
||||
rocm_memory_lock_to_fine_grain(dvctx.db_page, 0x1000, &gpu_db_page, hip_dev_id);
|
||||
|
||||
uint64_t *db_page_u64 = reinterpret_cast<uint64_t*>(dvctx.db_page);
|
||||
uint64_t *gpu_db_page_u64 = reinterpret_cast<uint64_t*>(gpu_db_page);
|
||||
|
||||
uint64_t *gpu_db_ptr = &gpu_db_page_u64[dvctx.db_ptr - db_page_u64];
|
||||
|
||||
gpu_db_page = gpu_db_page;
|
||||
gpu_db_cq = &gpu_db_ptr[dvctx.cq_qtype];
|
||||
gpu_db_sq = &gpu_db_ptr[dvctx.sq_qtype];
|
||||
|
||||
uint8_t udma_idx = ionic_dv_qp_get_udma_idx(qps[conn_num]);
|
||||
|
||||
ionic_dv_cq dvcq;
|
||||
ionic_dv_get_cq(&dvcq, cqs[conn_num], udma_idx);
|
||||
|
||||
gpu_qp->cq_dbreg = gpu_db_cq;
|
||||
gpu_qp->cq_dbval = dvcq.q.db_val;
|
||||
gpu_qp->cq_mask = dvcq.q.mask;
|
||||
|
||||
gpu_qp->ionic_cq_buf = reinterpret_cast<ionic_v1_cqe*>(dvcq.q.ptr);
|
||||
|
||||
ionic_dv_qp dvqp;
|
||||
ionic_dv_get_qp(&dvqp, qps[conn_num]);
|
||||
|
||||
gpu_qp->sq_dbreg = gpu_db_sq;
|
||||
gpu_qp->sq_dbval = dvqp.sq.db_val;
|
||||
gpu_qp->sq_mask = dvqp.sq.mask;
|
||||
gpu_qp->ionic_sq_buf = reinterpret_cast<ionic_v1_wqe *>(dvqp.sq.ptr);
|
||||
|
||||
strncpy(gpu_qp->dev_name,
|
||||
qps[conn_num]->context->device->name,
|
||||
sizeof(gpu_qp->dev_name));
|
||||
gpu_qp->dev_name[sizeof(gpu_qp->dev_name) - 1] = 0;
|
||||
|
||||
gpu_qp->qp_num = qps[conn_num]->qp_num;
|
||||
gpu_qp->lkey = heap_mr->lkey;
|
||||
gpu_qp->rkey = heap_rkey[conn_num % num_pes];
|
||||
gpu_qp->inline_threshold = 32;
|
||||
#endif /* GDA_IONIC */
|
||||
if (gda_vendor == GDAVendor::MLX5) {
|
||||
mlx5dv_cq cq_out;
|
||||
mlx5dv_obj mlx_obj;
|
||||
mlx_obj.cq.in = cqs[conn_num];
|
||||
mlx_obj.cq.out = &cq_out;
|
||||
mlx5dv_ftable_.init_obj(&mlx_obj, MLX5DV_OBJ_CQ);
|
||||
dump_mlx5dv_cq(&cq_out, conn_num);
|
||||
|
||||
/*
|
||||
* struct mlx5dv_cq {
|
||||
* void *buf;
|
||||
* __be32 *dbrec;
|
||||
* uint32_t cqe_cnt;
|
||||
* uint32_t cqe_size;
|
||||
* void *cq_uar;
|
||||
* uint32_t cqn;
|
||||
* uint64_t comp_mask;
|
||||
* };
|
||||
*/
|
||||
|
||||
gpu_qp->cq_buf = reinterpret_cast<mlx5_cqe64*>(cq_out.buf);
|
||||
gpu_qp->cq_cnt = cq_out.cqe_cnt;
|
||||
gpu_qp->cq_log_cnt = log2(cq_out.cqe_cnt);
|
||||
gpu_qp->cq_dbrec = cq_out.dbrec;
|
||||
|
||||
mlx5dv_qp qp_out;
|
||||
mlx_obj.qp.in = qps[conn_num];
|
||||
mlx_obj.qp.out = &qp_out;
|
||||
mlx5dv_ftable_.init_obj(&mlx_obj, MLX5DV_OBJ_QP);
|
||||
dump_mlx5dv_qp(&qp_out, conn_num);
|
||||
|
||||
/*
|
||||
* struct mlx5dv_qp {
|
||||
* __be32 *dbrec;
|
||||
* struct {
|
||||
* void *buf;
|
||||
* uint32_t wqe_cnt;
|
||||
* uint32_t stride;
|
||||
* } sq;
|
||||
* struct {
|
||||
* void *buf;
|
||||
* uint32_t wqe_cnt;
|
||||
* uint32_t stride;
|
||||
* } rq;
|
||||
* struct {
|
||||
* void *reg;
|
||||
* uint32_t size;
|
||||
* } bf;
|
||||
* uint64_t comp_mask;
|
||||
* off_t uar_mmap_offset;
|
||||
* uint32_t tirn;
|
||||
* uint32_t tisn;
|
||||
* uint32_t rqn;
|
||||
* uint32_t sqn;
|
||||
* uint64_t tir_icm_addr;
|
||||
* };
|
||||
*/
|
||||
|
||||
gpu_qp->dbrec = &qp_out.dbrec[1]; // points to two pointers: 0 -> MLX5_REC_DBR, 1 -> MLX5_SND_DBR
|
||||
gpu_qp->sq_buf = reinterpret_cast<uint64_t*>(qp_out.sq.buf);
|
||||
gpu_qp->sq_wqe_cnt = qp_out.sq.wqe_cnt;
|
||||
gpu_qp->rkey = htobe32(heap_rkey[conn_num % num_pes]);
|
||||
gpu_qp->lkey = htobe32(heap_mr->lkey);
|
||||
gpu_qp->qp_num = qps[conn_num]->qp_num;
|
||||
gpu_qp->inline_threshold = inline_threshold;
|
||||
// The 2 in qp_out.bf.size * 2 below facilitates the switching between blue flame registers
|
||||
void* gpu_ptr{nullptr};
|
||||
rocm_memory_lock_to_fine_grain(qp_out.bf.reg, qp_out.bf.size * 2, &gpu_ptr, hip_dev_id);
|
||||
gpu_qp->db.ptr = reinterpret_cast<uint64_t*>(gpu_ptr);
|
||||
switch (gda_vendor) {
|
||||
case GDAVendor::IONIC:
|
||||
ionic_initialize_gpu_qp(gpu_qp, conn_num);
|
||||
break;
|
||||
case GDAVendor::BNXT:
|
||||
bnxt_initialize_gpu_qp(gpu_qp, conn_num);
|
||||
break;
|
||||
case GDAVendor::MLX5:
|
||||
mlx5_initialize_gpu_qp(gpu_qp, conn_num);
|
||||
break;
|
||||
default:
|
||||
assert(false /* GDAVendor initialize_gpu_qp */);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -40,32 +40,6 @@
|
||||
#include "gda/bnxt/provider_gda_bnxt.hpp"
|
||||
#include "gda/mlx5/provider_gda_mlx5.hpp"
|
||||
|
||||
struct bnxtdv_funcs_t {
|
||||
int (*init_obj)(struct bnxt_re_dv_obj *obj, uint64_t obj_type);
|
||||
struct ibv_qp* (*create_qp)(struct ibv_pd *pd,
|
||||
struct bnxt_re_dv_qp_init_attr *qp_attr);
|
||||
int (*destroy_qp)(struct ibv_qp *ibvqp);
|
||||
int (*modify_qp)(struct ibv_qp *ibv_qp, struct ibv_qp_attr *attr,
|
||||
int attr_mask, uint32_t type, uint32_t value);
|
||||
int (*qp_mem_alloc)(struct ibv_pd *ibvpd,
|
||||
struct ibv_qp_init_attr *attr,
|
||||
struct bnxt_re_dv_qp_mem_info *dv_qp_mem);
|
||||
struct ibv_cq* (*create_cq)(struct ibv_context *ibvctx,
|
||||
struct bnxt_re_dv_cq_init_attr *cq_attr);
|
||||
int (*destroy_cq)(struct ibv_cq *ibv_cq);
|
||||
void* (*cq_mem_alloc)(struct ibv_context *ibvctx, int num_cqe,
|
||||
struct bnxt_re_dv_cq_attr *cq_attr);
|
||||
void* (*umem_reg)(struct ibv_context *ibvctx,
|
||||
struct bnxt_re_dv_umem_reg_attr *in);
|
||||
int (*umem_dereg)(void *umem_handle);
|
||||
int (*get_default_db_region)(struct ibv_context *ibvctx,
|
||||
struct bnxt_re_dv_db_region_attr *out);
|
||||
};
|
||||
|
||||
struct mlx5dv_funcs_t {
|
||||
int (*init_obj)(struct mlx5dv_obj *obj, uint64_t obj_type);
|
||||
};
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
class GDAContext;
|
||||
@@ -325,6 +299,8 @@ class GDABackend : public Backend {
|
||||
|
||||
void initialize_gpu_qp(QueuePair* qp, int conn_num);
|
||||
void bnxt_initialize_gpu_qp(QueuePair* qp, int conn_num);
|
||||
void ionic_initialize_gpu_qp(QueuePair* qp, int conn_num);
|
||||
void mlx5_initialize_gpu_qp(QueuePair* qp, int conn_num);
|
||||
|
||||
/**
|
||||
* @brief Setup InfiniBand Resources
|
||||
@@ -337,9 +313,14 @@ class GDABackend : public Backend {
|
||||
void cleanup_ibv();
|
||||
|
||||
/**
|
||||
* @brief Detect the available direct verbs libraries
|
||||
* @brief Detect and load the available direct verbs libraries
|
||||
*/
|
||||
void autodetect_dv_libs();
|
||||
void open_dv_libs();
|
||||
|
||||
/**
|
||||
* @ brief Close opened direct verbs libraries
|
||||
*/
|
||||
void close_dv_libs();
|
||||
|
||||
/**
|
||||
* @brief Open InfiniBand Device and create common structures
|
||||
@@ -404,6 +385,7 @@ class GDABackend : public Backend {
|
||||
static void pd_release(ibv_pd* pd, void* pd_context, void* ptr, uint64_t resource_type);
|
||||
|
||||
void create_parent_domain();
|
||||
void ionic_setup_parent_domain(struct ibv_parent_domain_init_attr* pattr);
|
||||
|
||||
void setup_gpu_qps();
|
||||
void cleanup_gpu_qps();
|
||||
@@ -509,7 +491,7 @@ class GDABackend : public Backend {
|
||||
* @brief structures holding the function pointers to the direct verbs functionality
|
||||
* of each network driver.
|
||||
*/
|
||||
bnxtdv_funcs_t bnxtdv_ftable_;
|
||||
bnxtdv_funcs_t bnxt_re_dv;
|
||||
|
||||
/**
|
||||
* @brief handle used for the dlopen of the BCOM library
|
||||
@@ -521,11 +503,16 @@ class GDABackend : public Backend {
|
||||
*/
|
||||
int bnxt_dv_dl_init();
|
||||
|
||||
/**
|
||||
* @brief open bnxt dv lib
|
||||
*/
|
||||
static void* bnxt_dv_dlopen();
|
||||
|
||||
/**
|
||||
* @brief structures holding the function pointers to the direct verbs functionality
|
||||
* of each network driver.
|
||||
*/
|
||||
mlx5dv_funcs_t mlx5dv_ftable_;
|
||||
mlx5dv_funcs_t mlx5dv;
|
||||
|
||||
/**
|
||||
* @brief handle used for the dlopen of the MLX5 library
|
||||
@@ -536,6 +523,32 @@ class GDABackend : public Backend {
|
||||
* @brief initialize function table for MLNX direct verbs support
|
||||
*/
|
||||
int mlx5_dv_dl_init();
|
||||
|
||||
/**
|
||||
* @brief open mlx5 dv lib
|
||||
*/
|
||||
static void* mlx5_dv_dlopen();
|
||||
|
||||
/**
|
||||
* @brief structures holding the function pointers to the direct verbs functionality
|
||||
* of each network driver.
|
||||
*/
|
||||
ionicdv_funcs_t ionic_dv;
|
||||
|
||||
/**
|
||||
* @brief handle used for the dlopen of the IONIC library
|
||||
*/
|
||||
void *ionicdv_handle_{nullptr};
|
||||
|
||||
/**
|
||||
* @brief initialize function table for IONIC direct verbs support
|
||||
*/
|
||||
int ionic_dv_dl_init();
|
||||
|
||||
/**
|
||||
* @brief open ionic dv lib
|
||||
*/
|
||||
static void* ionic_dv_dlopen();
|
||||
};
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
@@ -42,7 +42,7 @@ void GDABackend::bnxt_initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
|
||||
dv_obj.cq.in = cqs[conn_num];
|
||||
dv_obj.cq.out = &dv_cq;
|
||||
|
||||
err = bnxtdv_ftable_.init_obj(&dv_obj, BNXT_RE_DV_OBJ_CQ);
|
||||
err = bnxt_re_dv.init_obj(&dv_obj, BNXT_RE_DV_OBJ_CQ);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_init_obj(CQ)");
|
||||
|
||||
memset(&gpu_qp->cq, 0, sizeof(bnxt_device_cq));
|
||||
@@ -56,7 +56,7 @@ void GDABackend::bnxt_initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
|
||||
dv_obj.qp.in = ib_qp;
|
||||
dv_obj.qp.out = &dv_qp;
|
||||
|
||||
err = bnxtdv_ftable_.init_obj(&dv_obj, BNXT_RE_DV_OBJ_QP);
|
||||
err = bnxt_re_dv.init_obj(&dv_obj, BNXT_RE_DV_OBJ_QP);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_init_obj(QP)");
|
||||
|
||||
memset(&gpu_qp->sq, 0, sizeof(bnxt_device_sq));
|
||||
@@ -76,7 +76,7 @@ void GDABackend::bnxt_initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
|
||||
gpu_qp->sq.mtu = ibv_mtu_to_int(portinfo.active_mtu);
|
||||
|
||||
/* Export DB */
|
||||
err = bnxtdv_ftable_.get_default_db_region(context, &db_region_attr);
|
||||
err = bnxt_re_dv.get_default_db_region(context, &db_region_attr);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_init_obj(QP)");
|
||||
|
||||
CHECK_HIP(hipHostRegister(db_region_attr.dbr, getpagesize(), hipHostRegisterDefault));
|
||||
@@ -98,7 +98,7 @@ void GDABackend::bnxt_create_cqs(int cqe) {
|
||||
for (int i = 0; i < qps.size(); i++) {
|
||||
/* Allocate CQ mem */
|
||||
memset(&cq_attr, 0, sizeof(struct bnxt_re_dv_cq_attr));
|
||||
bnxt_cqs[i].handle = bnxtdv_ftable_.cq_mem_alloc(context, cqe, &cq_attr);
|
||||
bnxt_cqs[i].handle = bnxt_re_dv.cq_mem_alloc(context, cqe, &cq_attr);
|
||||
CHECK_NNULL(bnxt_cqs[i].handle, "bnxt_re_dv_cq_mem_alloc");
|
||||
|
||||
/* Allocate CQ UMEM */
|
||||
@@ -112,7 +112,7 @@ void GDABackend::bnxt_create_cqs(int cqe) {
|
||||
umem_attr.size = bnxt_cqs[i].length;
|
||||
umem_attr.access_flags = IBV_ACCESS_LOCAL_WRITE;
|
||||
|
||||
bnxt_cqs[i].umem_handle = bnxtdv_ftable_.umem_reg(context, &umem_attr);
|
||||
bnxt_cqs[i].umem_handle = bnxt_re_dv.umem_reg(context, &umem_attr);
|
||||
CHECK_NNULL(bnxt_cqs[i].umem_handle, "bnxt_re_dv_umem_reg(cq_buf)");
|
||||
|
||||
/* Create CQ */
|
||||
@@ -121,7 +121,7 @@ void GDABackend::bnxt_create_cqs(int cqe) {
|
||||
cq_init_attr.umem_handle = bnxt_cqs[i].umem_handle;
|
||||
cq_init_attr.ncqe = cq_attr.ncqe;
|
||||
|
||||
cqs[i] = bnxtdv_ftable_.create_cq(context, &cq_init_attr);
|
||||
cqs[i] = bnxt_re_dv.create_cq(context, &cq_init_attr);
|
||||
CHECK_NNULL(cqs[i], "bnxt_re_dv_create_cq");
|
||||
}
|
||||
}
|
||||
@@ -152,7 +152,7 @@ void GDABackend::bnxt_create_qps(int sq_length) {
|
||||
|
||||
/* Alloc qp_mem_info */
|
||||
memset(&bnxt_qps[i].mem_info, 0, sizeof(struct bnxt_re_dv_qp_mem_info));
|
||||
err = bnxtdv_ftable_.qp_mem_alloc(pd_orig, &ib_qp_attr, &bnxt_qps[i].mem_info);
|
||||
err = bnxt_re_dv.qp_mem_alloc(pd_orig, &ib_qp_attr, &bnxt_qps[i].mem_info);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_qp_mem_alloc");
|
||||
|
||||
/* Alloc SQ */
|
||||
@@ -177,7 +177,7 @@ void GDABackend::bnxt_create_qps(int sq_length) {
|
||||
umem_attr.size = bnxt_qps[i].mem_info.sq_len;
|
||||
umem_attr.access_flags = IBV_ACCESS_LOCAL_WRITE;
|
||||
|
||||
sq_umem_handle = bnxtdv_ftable_.umem_reg(context, &umem_attr);
|
||||
sq_umem_handle = bnxt_re_dv.umem_reg(context, &umem_attr);
|
||||
CHECK_NNULL(sq_umem_handle, "bnxt_re_dv_umem_reg(sq)");
|
||||
|
||||
memset(&umem_attr, 0, sizeof(struct bnxt_re_dv_umem_reg_attr));
|
||||
@@ -185,7 +185,7 @@ void GDABackend::bnxt_create_qps(int sq_length) {
|
||||
umem_attr.size = bnxt_qps[i].mem_info.rq_len;
|
||||
umem_attr.access_flags = IBV_ACCESS_LOCAL_WRITE;
|
||||
|
||||
rq_umem_handle = bnxtdv_ftable_.umem_reg(context, &umem_attr);
|
||||
rq_umem_handle = bnxt_re_dv.umem_reg(context, &umem_attr);
|
||||
CHECK_NNULL(rq_umem_handle, "bnxt_re_dv_umem_reg(rq)");
|
||||
|
||||
/* IB DV QP Init Attr */
|
||||
@@ -214,33 +214,40 @@ void GDABackend::bnxt_create_qps(int sq_length) {
|
||||
bnxt_qps[i].attr.comp_mask = bnxt_qps[i].mem_info.comp_mask;
|
||||
|
||||
/* Alloc QP */
|
||||
qps[i] = bnxtdv_ftable_.create_qp(pd_orig, &bnxt_qps[i].attr);
|
||||
qps[i] = bnxt_re_dv.create_qp(pd_orig, &bnxt_qps[i].attr);
|
||||
CHECK_NNULL(qps[i], "bnxt_re_dv_create_qp");
|
||||
}
|
||||
}
|
||||
|
||||
int GDABackend::bnxt_dv_dl_init() {
|
||||
bnxtdv_handle_ = dlopen("libbnxt_re.so", RTLD_NOW);
|
||||
if (!bnxtdv_handle_) {
|
||||
void* GDABackend::bnxt_dv_dlopen() {
|
||||
void* dv_handle{nullptr};
|
||||
dv_handle = dlopen("libbnxt_re.so", RTLD_NOW);
|
||||
if (!dv_handle) {
|
||||
// Try hard-coded PATH
|
||||
bnxtdv_handle_ = dlopen("/usr/local/lib/libbnxt_re.so", RTLD_NOW);
|
||||
if (!bnxtdv_handle_) {
|
||||
dv_handle = dlopen("/usr/local/lib/libbnxt_re.so", RTLD_NOW);
|
||||
if (!dv_handle) {
|
||||
DPRINTF("Could not open libbnxt_re.so. Returning\n");
|
||||
return ROCSHMEM_ERROR;
|
||||
}
|
||||
}
|
||||
return dv_handle;
|
||||
}
|
||||
|
||||
DLSYM_HELPER(bnxtdv_ftable_, bnxt_re_dv_, bnxtdv_handle_, init_obj);
|
||||
DLSYM_HELPER(bnxtdv_ftable_, bnxt_re_dv_, bnxtdv_handle_, create_qp);
|
||||
DLSYM_HELPER(bnxtdv_ftable_, bnxt_re_dv_, bnxtdv_handle_, destroy_qp);
|
||||
DLSYM_HELPER(bnxtdv_ftable_, bnxt_re_dv_, bnxtdv_handle_, modify_qp);
|
||||
DLSYM_HELPER(bnxtdv_ftable_, bnxt_re_dv_, bnxtdv_handle_, qp_mem_alloc);
|
||||
DLSYM_HELPER(bnxtdv_ftable_, bnxt_re_dv_, bnxtdv_handle_, create_cq);
|
||||
DLSYM_HELPER(bnxtdv_ftable_, bnxt_re_dv_, bnxtdv_handle_, destroy_cq);
|
||||
DLSYM_HELPER(bnxtdv_ftable_, bnxt_re_dv_, bnxtdv_handle_, cq_mem_alloc);
|
||||
DLSYM_HELPER(bnxtdv_ftable_, bnxt_re_dv_, bnxtdv_handle_, umem_reg);
|
||||
DLSYM_HELPER(bnxtdv_ftable_, bnxt_re_dv_, bnxtdv_handle_, umem_dereg);
|
||||
DLSYM_HELPER(bnxtdv_ftable_, bnxt_re_dv_, bnxtdv_handle_, get_default_db_region);
|
||||
int GDABackend::bnxt_dv_dl_init() {
|
||||
bnxtdv_handle_ = bnxt_dv_dlopen();
|
||||
if (!bnxtdv_handle_)
|
||||
return ROCSHMEM_ERROR;
|
||||
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, init_obj);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, create_qp);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, destroy_qp);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, modify_qp);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, qp_mem_alloc);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, create_cq);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, destroy_cq);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, cq_mem_alloc);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, umem_reg);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, umem_dereg);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, get_default_db_region);
|
||||
|
||||
return ROCSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -83,4 +83,26 @@ struct bnxt_host_qp {
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
struct bnxtdv_funcs_t {
|
||||
int (*init_obj)(struct bnxt_re_dv_obj *obj, uint64_t obj_type);
|
||||
struct ibv_qp* (*create_qp)(struct ibv_pd *pd,
|
||||
struct bnxt_re_dv_qp_init_attr *qp_attr);
|
||||
int (*destroy_qp)(struct ibv_qp *ibvqp);
|
||||
int (*modify_qp)(struct ibv_qp *ibv_qp, struct ibv_qp_attr *attr,
|
||||
int attr_mask, uint32_t type, uint32_t value);
|
||||
int (*qp_mem_alloc)(struct ibv_pd *ibvpd,
|
||||
struct ibv_qp_init_attr *attr,
|
||||
struct bnxt_re_dv_qp_mem_info *dv_qp_mem);
|
||||
struct ibv_cq* (*create_cq)(struct ibv_context *ibvctx,
|
||||
struct bnxt_re_dv_cq_init_attr *cq_attr);
|
||||
int (*destroy_cq)(struct ibv_cq *ibv_cq);
|
||||
void* (*cq_mem_alloc)(struct ibv_context *ibvctx, int num_cqe,
|
||||
struct bnxt_re_dv_cq_attr *cq_attr);
|
||||
void* (*umem_reg)(struct ibv_context *ibvctx,
|
||||
struct bnxt_re_dv_umem_reg_attr *in);
|
||||
int (*umem_dereg)(void *umem_handle);
|
||||
int (*get_default_db_region)(struct ibv_context *ibvctx,
|
||||
struct bnxt_re_dv_db_region_attr *out);
|
||||
};
|
||||
|
||||
#endif //LIBRARY_SRC_GDA_BNXT_GDA_PROVIDER_HPP_
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
###############################################################################
|
||||
# Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
###############################################################################
|
||||
target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
backend_gda_ionic.cpp
|
||||
)
|
||||
|
||||
if(GDA_IONIC)
|
||||
target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
queue_pair_ionic.cpp
|
||||
)
|
||||
endif()
|
||||
@@ -0,0 +1,122 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "gda/backend_gda.hpp"
|
||||
#include "util.hpp"
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
void GDABackend::ionic_initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
|
||||
ionic_dv_ctx dvctx;
|
||||
ionic_dv.get_ctx(&dvctx, context);
|
||||
|
||||
int hip_dev_id{-1};
|
||||
CHECK_HIP(hipGetDevice(&hip_dev_id));
|
||||
|
||||
void* gpu_db_page = nullptr;
|
||||
rocm_memory_lock_to_fine_grain(dvctx.db_page, 0x1000, &gpu_db_page, hip_dev_id);
|
||||
|
||||
uint64_t *db_page_u64 = reinterpret_cast<uint64_t*>(dvctx.db_page);
|
||||
uint64_t *gpu_db_page_u64 = reinterpret_cast<uint64_t*>(gpu_db_page);
|
||||
|
||||
uint64_t *gpu_db_ptr = &gpu_db_page_u64[dvctx.db_ptr - db_page_u64];
|
||||
|
||||
gpu_db_page = gpu_db_page;
|
||||
gpu_db_cq = &gpu_db_ptr[dvctx.cq_qtype];
|
||||
gpu_db_sq = &gpu_db_ptr[dvctx.sq_qtype];
|
||||
|
||||
uint8_t udma_idx = ionic_dv.qp_get_udma_idx(qps[conn_num]);
|
||||
|
||||
ionic_dv_cq dvcq;
|
||||
ionic_dv.get_cq(&dvcq, cqs[conn_num], udma_idx);
|
||||
|
||||
gpu_qp->cq_dbreg = gpu_db_cq;
|
||||
gpu_qp->cq_dbval = dvcq.q.db_val;
|
||||
gpu_qp->cq_mask = dvcq.q.mask;
|
||||
|
||||
gpu_qp->ionic_cq_buf = reinterpret_cast<ionic_v1_cqe*>(dvcq.q.ptr);
|
||||
|
||||
ionic_dv_qp dvqp;
|
||||
ionic_dv.get_qp(&dvqp, qps[conn_num]);
|
||||
|
||||
gpu_qp->sq_dbreg = gpu_db_sq;
|
||||
gpu_qp->sq_dbval = dvqp.sq.db_val;
|
||||
gpu_qp->sq_mask = dvqp.sq.mask;
|
||||
gpu_qp->ionic_sq_buf = reinterpret_cast<ionic_v1_wqe *>(dvqp.sq.ptr);
|
||||
|
||||
strncpy(gpu_qp->dev_name,
|
||||
qps[conn_num]->context->device->name,
|
||||
sizeof(gpu_qp->dev_name));
|
||||
gpu_qp->dev_name[sizeof(gpu_qp->dev_name) - 1] = 0;
|
||||
|
||||
gpu_qp->qp_num = qps[conn_num]->qp_num;
|
||||
gpu_qp->lkey = heap_mr->lkey;
|
||||
gpu_qp->rkey = heap_rkey[conn_num % num_pes];
|
||||
gpu_qp->inline_threshold = 32;
|
||||
}
|
||||
|
||||
void GDABackend::ionic_setup_parent_domain(struct ibv_parent_domain_init_attr* pattr) {
|
||||
ionic_dv.pd_set_sqcmb(pd_parent, false, false, false);
|
||||
ionic_dv.pd_set_rqcmb(pd_parent, false, false, false);
|
||||
|
||||
for (int uxdma_i = 0; uxdma_i < 2; ++uxdma_i) {
|
||||
pd_uxdma[uxdma_i] = ibv_alloc_parent_domain(context, pattr);
|
||||
CHECK_NNULL(pd_uxdma[uxdma_i], "ibv_alloc_parent_domain (uxdma)");
|
||||
|
||||
ionic_dv.pd_set_sqcmb(pd_uxdma[uxdma_i], false, false, false);
|
||||
ionic_dv.pd_set_rqcmb(pd_uxdma[uxdma_i], false, false, false);
|
||||
ionic_dv.pd_set_udma_mask(pd_uxdma[uxdma_i], 1u << uxdma_i);
|
||||
}
|
||||
}
|
||||
|
||||
void* GDABackend::ionic_dv_dlopen() {
|
||||
void* dv_handle{nullptr};
|
||||
dv_handle = dlopen("libionic.so", RTLD_NOW);
|
||||
if (!dv_handle) {
|
||||
// Try hard-coded PATH
|
||||
dv_handle = dlopen("/usr/local/lib/libionic.so", RTLD_NOW);
|
||||
if (!dv_handle) {
|
||||
DPRINTF("Could not open libionic.so. Returning\n");
|
||||
}
|
||||
}
|
||||
return dv_handle;
|
||||
}
|
||||
|
||||
int GDABackend::ionic_dv_dl_init() {
|
||||
ionicdv_handle_ = ionic_dv_dlopen();
|
||||
if (!ionicdv_handle_)
|
||||
return ROCSHMEM_ERROR;
|
||||
|
||||
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, get_ctx);
|
||||
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, qp_get_udma_idx);
|
||||
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, get_cq);
|
||||
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, get_qp);
|
||||
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, pd_set_sqcmb);
|
||||
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, pd_set_rqcmb);
|
||||
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, pd_set_udma_mask);
|
||||
|
||||
return ROCSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace rocshmem
|
||||
@@ -0,0 +1,245 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
|
||||
/*
|
||||
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef IONIC_DV_H
|
||||
#define IONIC_DV_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <infiniband/verbs.h>
|
||||
|
||||
struct ibv_cq;
|
||||
struct ibv_qp;
|
||||
|
||||
/** IONIC_PD_TAG - tag used for parent domain resource allocation. */
|
||||
#define IONIC_PD_TAG ((uint64_t)RDMA_DRIVER_IONIC << 32)
|
||||
#define IONIC_PD_TAG_CQ (IONIC_PD_TAG | 1)
|
||||
#define IONIC_PD_TAG_SQ (IONIC_PD_TAG | 2)
|
||||
#define IONIC_PD_TAG_RQ (IONIC_PD_TAG | 3)
|
||||
#define IONIC_PD_TAG_RCQ (IONIC_PD_TAG | 4)
|
||||
|
||||
/* deprecated */
|
||||
#define IONIC_SQ_SIG_ALL 1
|
||||
/* deprecated */
|
||||
#define IONIC_SQ_SIG_HACK_HIGH 2
|
||||
|
||||
/** IONIC_UDMA_MASK_LOW - flag represents the udma0 pipeline in the udma mask. */
|
||||
#define IONIC_UDMA_MASK_LOW 1
|
||||
/** IONIC_UDMA_MASK_HIGH - flag represents the udma1 pipeline in the udma mask. */
|
||||
#define IONIC_UDMA_MASK_HIGH 2
|
||||
|
||||
#define IONIC_DV_PUEC_NPORTS_MAX 8
|
||||
|
||||
/** struct ionic_dv_ctx - Context information for gpu-initiated rdma. */
|
||||
struct ionic_dv_ctx {
|
||||
void *db_page;
|
||||
uint64_t *db_ptr;
|
||||
uint8_t sq_qtype;
|
||||
uint8_t rq_qtype;
|
||||
uint8_t cq_qtype;
|
||||
};
|
||||
|
||||
/** struct ionic_dv_ctx - Queue information for gpu-initiated rdma. */
|
||||
struct ionic_dv_queue {
|
||||
void *ptr;
|
||||
size_t size;
|
||||
uint64_t db_val;
|
||||
uint16_t mask;
|
||||
uint8_t depth_log2;
|
||||
uint8_t stride_log2;
|
||||
};
|
||||
|
||||
/** struct ionic_dv_ctx - CQ information for gpu-initiated rdma. */
|
||||
struct ionic_dv_cq {
|
||||
struct ionic_dv_queue q;
|
||||
};
|
||||
|
||||
/** struct ionic_dv_ctx - QP information for gpu-initiated rdma. */
|
||||
struct ionic_dv_qp {
|
||||
struct ionic_dv_queue rq;
|
||||
struct ionic_dv_queue sq;
|
||||
};
|
||||
|
||||
/** struct ionic_puec_route - Info needed to setup a PUEC plane route. */
|
||||
struct ionic_dv_puec_route {
|
||||
union ibv_gid dgid;
|
||||
union ibv_gid sgid;
|
||||
uint32_t flow_label;
|
||||
uint8_t hop_limit;
|
||||
uint8_t traffic_class;
|
||||
uint8_t sl;
|
||||
uint8_t rsvd[5];
|
||||
uint32_t flags;
|
||||
};
|
||||
|
||||
/**
|
||||
* ionic_dv_is_ionic_ctx - Test if context belongs to ionic provider.
|
||||
*/
|
||||
bool ionic_dv_is_ionic_ctx(struct ibv_context *ibctx);
|
||||
|
||||
/**
|
||||
* ionic_dv_is_ionic_pd - Test if pd belongs to ionic provider.
|
||||
*/
|
||||
bool ionic_dv_is_ionic_pd(struct ibv_pd *ibpd);
|
||||
|
||||
/**
|
||||
* ionic_dv_is_ionic_cq - Test if cq belongs to ionic provider.
|
||||
*/
|
||||
bool ionic_dv_is_ionic_cq(struct ibv_cq *ibcq);
|
||||
|
||||
/**
|
||||
* ionic_dv_is_ionic_qp - Test if qp belongs to ionic provider.
|
||||
*/
|
||||
bool ionic_dv_is_ionic_qp(struct ibv_qp *ibqp);
|
||||
|
||||
|
||||
/**
|
||||
* ionic_dv_ctx_get_udma_count - Get number of udma pipelines.
|
||||
*/
|
||||
uint8_t ionic_dv_ctx_get_udma_count(struct ibv_context *ibctx);
|
||||
|
||||
/**
|
||||
* ionic_dv_ctx_get_udma_mask - Get mask of udma pipeline ids.
|
||||
*/
|
||||
uint8_t ionic_dv_ctx_get_udma_mask(struct ibv_context *ibctx);
|
||||
|
||||
/**
|
||||
* ionic_dv_pd_get_udma_mask - Get mask of udma pipeline ids of pd or parent domain.
|
||||
*/
|
||||
uint8_t ionic_dv_pd_get_udma_mask(struct ibv_pd *ibpd);
|
||||
|
||||
/**
|
||||
* ionic_dv_pd_set_udma_mask - Restrict pipeline ids of pd or parent domain.
|
||||
*
|
||||
* Queues associated with this pd will be restricted to one of the pipelines enabled by
|
||||
* the mask at the time of queue creation.
|
||||
*
|
||||
* Recommended usage is to create a pd, then parent domains of that pd for each different
|
||||
* udma mask. Set the desired udma mask on each parent domain. Then, create queues
|
||||
* associated with the parent domain with the desired udma mask.
|
||||
*
|
||||
* Alternative usage is to create a pd, and set the desired udma mask prior to creating
|
||||
* each queue. Changing the udma mask of the pd has no effect on previously created
|
||||
* queues.
|
||||
*/
|
||||
int ionic_dv_pd_set_udma_mask(struct ibv_pd *ibpd, uint8_t udma_mask);
|
||||
|
||||
/**
|
||||
* ionic_dv_cq_get_udma_mask - Get mask of udma pipeline ids of completion queueue.
|
||||
*/
|
||||
uint8_t ionic_dv_cq_get_udma_mask(struct ibv_cq *ibcq);
|
||||
|
||||
/**
|
||||
* ionic_dv_qp_get_udma_idx - Get udma pipeline id of queueue pair.
|
||||
*/
|
||||
uint8_t ionic_dv_qp_get_udma_idx(struct ibv_qp *ibqp);
|
||||
|
||||
|
||||
/**
|
||||
* ionic_dv_pd_set_sqcmb - Specify send queue preference for controller memory bar.
|
||||
*
|
||||
* Send queues associated with this pd will use the controller memory bar according to
|
||||
* this preference at the time of queue creation.
|
||||
*
|
||||
* @enable - Allow the use of the controller memory bar.
|
||||
* @expdb - Allow the use of express doorbell optimizations.
|
||||
* @require - Require preferences to be met, no fallback.
|
||||
*/
|
||||
int ionic_dv_pd_set_sqcmb(struct ibv_pd *ibpd, bool enable, bool expdb, bool require);
|
||||
|
||||
/**
|
||||
* ionic_dv_pd_set_rqcmb - Specify receive queue preference for controller memory bar.
|
||||
*
|
||||
* See ionic_dv_pd_set_sqcmb().
|
||||
*/
|
||||
int ionic_dv_pd_set_rqcmb(struct ibv_pd *ibpd, bool enable, bool expdb, bool require);
|
||||
|
||||
|
||||
/**
|
||||
* ionic_dv_qp_set_gda - Enable or disable GPU-Direct Async (GDA) mode.
|
||||
*
|
||||
* In GDA mode, when the application calls ibv_post_send() or ibv_post_recv(), the
|
||||
* provider writes WQEs in the descriptor ring without ringing the doorbell.
|
||||
*
|
||||
* To ring the doorbell, after posting the work the application should query to get the
|
||||
* doorbell data, and later write that data to the memory mapped doorbell register.
|
||||
*
|
||||
* See also: ionic_dv_get_ctx()
|
||||
* See also: ionic_dv_qp_get_send_dbell_data()
|
||||
* See also: ionic_dv_qp_get_recv_dbell_data()
|
||||
*
|
||||
* @ibqp - Set GDA mode for this queue pair.
|
||||
* @enable_send - Enable GDA mode for the send queue.
|
||||
* @enable_recv - Enable GDA mode for the recv queue.
|
||||
*/
|
||||
int ionic_dv_qp_set_gda(struct ibv_qp *ibqp, bool enable_send, bool enable_recv);
|
||||
|
||||
/**
|
||||
* ionic_dv_qp_get_send_dbell_data - Get send queue doorbell data.
|
||||
*
|
||||
* In GDA mode, when the application calls ibv_post_send() the provider writes WQEs in
|
||||
* the descriptor ring without ringing the doorbell. The application should query the
|
||||
* doorbell data immediately after posting the work. The application requests the
|
||||
* GPU to fill the source buffers of the data transfer with the result of computation.
|
||||
* The application requests the GPU to write the doorbell data to the memory mapped
|
||||
* doorbell register immediately when the computation is complete, triggering the data
|
||||
* transfer.
|
||||
*
|
||||
* It is important that the GPU ring the doorbell in sequential order. If work requests
|
||||
* are posted in batches A, B, and C, with respective doorbell data, the data path must
|
||||
* not write B or C before A, and must not write C before B. It is ok to skip writing a
|
||||
* doorbell, like writing only C, which will trigger the data transfer for all of the
|
||||
* work up to that point in the sequence.
|
||||
*
|
||||
* @ibqp - Get send doorbell data for this queue pair.
|
||||
* @dbdata - Output parameter for doorbell data.
|
||||
*/
|
||||
int ionic_dv_qp_get_send_dbell_data(struct ibv_qp *ibqp, uint64_t *dbdata);
|
||||
|
||||
/**
|
||||
* ionic_dv_qp_get_recv_dbell_data - Get recv queue doorbell data.
|
||||
*
|
||||
* In GDA mode, when the application calls ibv_post_recv() the provider writes WQEs in
|
||||
* the descriptor ring without ringing the doorbell. After polling recv completions, the
|
||||
* application can immediately re-post the receive buffers without ringing the doorbell.
|
||||
* The application should query the doorbell data immediately after posting the buffers.
|
||||
* The application requests the GPU consume the data from the receive buffers. The
|
||||
* application requests the GPU to write the doorbell data to the memory mapped doorbell
|
||||
* register immediately after the received data is consumed, making the buffers available
|
||||
* for the next data transfer.
|
||||
*
|
||||
* It is important that the GPU ring the doorbell in sequential order. If work requests
|
||||
* are posted in batches A, B, and C, with respective doorbell data, the data path must
|
||||
* not write B or C before A, and must not write C before B. It is ok to skip writing a
|
||||
* doorbell, like writing only C, which will make buffers available up to that point in
|
||||
* the sequence.
|
||||
*
|
||||
* @ibqp - Get recv doorbell data for this queue pair.
|
||||
* @dbdata - Output parameter for doorbell data.
|
||||
*/
|
||||
int ionic_dv_qp_get_recv_dbell_data(struct ibv_qp *ibqp, uint64_t *dbdata);
|
||||
|
||||
|
||||
/**
|
||||
* ionic_dv_get_ctx - Extract context information for gpu-initiated rdma.
|
||||
*/
|
||||
int ionic_dv_get_ctx(struct ionic_dv_ctx *dvctx, struct ibv_context *ibctx);
|
||||
|
||||
/**
|
||||
* ionic_dv_get_cq - Extract cq information for gpu-initiated rdma.
|
||||
*/
|
||||
int ionic_dv_get_cq(struct ionic_dv_cq *dvcq, struct ibv_cq *ibcq, uint8_t udma_idx);
|
||||
|
||||
/**
|
||||
* ionic_dv_get_qp - Extract qp information for gpu-initiated rdma.
|
||||
*/
|
||||
int ionic_dv_get_qp(struct ionic_dv_qp *dvqp, struct ibv_qp *ibqp);
|
||||
|
||||
/**
|
||||
* ionic_dv_qp_set_puec_plane_route - set route info for a PUEC plane.
|
||||
*/
|
||||
int ionic_dv_qp_set_puec_plane_route(struct ibv_qp *ibqp, uint8_t plane_idx,
|
||||
struct ionic_dv_puec_route *ipr);
|
||||
|
||||
#endif /* IONIC_DV_H */
|
||||
@@ -0,0 +1,534 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
|
||||
/*
|
||||
* Copyright (c) 2018-2022 Pensando Systems, Inc. All rights reserved.
|
||||
* 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef IONIC_FW_H
|
||||
#define IONIC_FW_H
|
||||
|
||||
#if !defined(__cplusplus)
|
||||
#include <util/util.h>
|
||||
#else
|
||||
#ifndef BIT
|
||||
#define BIT(n) (1u << (n))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define IONIC_EXP_DBELL_SZ 8
|
||||
|
||||
/* common to all versions */
|
||||
|
||||
/* wqe scatter gather element */
|
||||
struct ionic_sge {
|
||||
__be64 va;
|
||||
__be32 len;
|
||||
__be32 lkey;
|
||||
};
|
||||
|
||||
/* admin queue mr type */
|
||||
enum ionic_mr_flags {
|
||||
/* bits that determine mr access */
|
||||
IONIC_MRF_LOCAL_WRITE = BIT(0),
|
||||
IONIC_MRF_REMOTE_WRITE = BIT(1),
|
||||
IONIC_MRF_REMOTE_READ = BIT(2),
|
||||
IONIC_MRF_REMOTE_ATOMIC = BIT(3),
|
||||
IONIC_MRF_MW_BIND = BIT(4),
|
||||
IONIC_MRF_ZERO_BASED = BIT(5),
|
||||
IONIC_MRF_ON_DEMAND = BIT(6),
|
||||
IONIC_MRF_PB = BIT(7),
|
||||
IONIC_MRF_ACCESS_MASK = BIT(12) - 1,
|
||||
|
||||
/* bits that determine mr type */
|
||||
IONIC_MRF_IS_MW = BIT(14),
|
||||
IONIC_MRF_INV_EN = BIT(15),
|
||||
|
||||
/* base flags combinations for mr types */
|
||||
IONIC_MRF_USER_MR = 0,
|
||||
IONIC_MRF_PHYS_MR = IONIC_MRF_INV_EN,
|
||||
IONIC_MRF_MW_1 = IONIC_MRF_IS_MW,
|
||||
IONIC_MRF_MW_2 = IONIC_MRF_IS_MW | IONIC_MRF_INV_EN,
|
||||
};
|
||||
|
||||
static inline int to_ionic_mr_flags(int access)
|
||||
{
|
||||
int flags = 0;
|
||||
|
||||
if (access & IBV_ACCESS_LOCAL_WRITE)
|
||||
flags |= IONIC_MRF_LOCAL_WRITE;
|
||||
|
||||
if (access & IBV_ACCESS_REMOTE_READ)
|
||||
flags |= IONIC_MRF_REMOTE_READ;
|
||||
|
||||
if (access & IBV_ACCESS_REMOTE_WRITE)
|
||||
flags |= IONIC_MRF_REMOTE_WRITE;
|
||||
|
||||
if (access & IBV_ACCESS_REMOTE_ATOMIC)
|
||||
flags |= IONIC_MRF_REMOTE_ATOMIC;
|
||||
|
||||
if (access & IBV_ACCESS_MW_BIND)
|
||||
flags |= IONIC_MRF_MW_BIND;
|
||||
|
||||
if (access & IBV_ACCESS_ZERO_BASED)
|
||||
flags |= IONIC_MRF_ZERO_BASED;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
/* cqe status indicated in status_length field when err bit is set */
|
||||
enum ionic_status {
|
||||
IONIC_STS_OK,
|
||||
IONIC_STS_LOCAL_LEN_ERR,
|
||||
IONIC_STS_LOCAL_QP_OPER_ERR,
|
||||
IONIC_STS_LOCAL_PROT_ERR,
|
||||
IONIC_STS_WQE_FLUSHED_ERR,
|
||||
IONIC_STS_MEM_MGMT_OPER_ERR,
|
||||
IONIC_STS_BAD_RESP_ERR,
|
||||
IONIC_STS_LOCAL_ACC_ERR,
|
||||
IONIC_STS_REMOTE_INV_REQ_ERR,
|
||||
IONIC_STS_REMOTE_ACC_ERR,
|
||||
IONIC_STS_REMOTE_OPER_ERR,
|
||||
IONIC_STS_RETRY_EXCEEDED,
|
||||
IONIC_STS_RNR_RETRY_EXCEEDED,
|
||||
IONIC_STS_XRC_VIO_ERR,
|
||||
};
|
||||
|
||||
static inline int ionic_to_ibv_status(int sts)
|
||||
{
|
||||
switch (sts) {
|
||||
case IONIC_STS_OK:
|
||||
return IBV_WC_SUCCESS;
|
||||
case IONIC_STS_LOCAL_LEN_ERR:
|
||||
return IBV_WC_LOC_LEN_ERR;
|
||||
case IONIC_STS_LOCAL_QP_OPER_ERR:
|
||||
return IBV_WC_LOC_QP_OP_ERR;
|
||||
case IONIC_STS_LOCAL_PROT_ERR:
|
||||
return IBV_WC_LOC_PROT_ERR;
|
||||
case IONIC_STS_WQE_FLUSHED_ERR:
|
||||
return IBV_WC_WR_FLUSH_ERR;
|
||||
case IONIC_STS_MEM_MGMT_OPER_ERR:
|
||||
return IBV_WC_MW_BIND_ERR;
|
||||
case IONIC_STS_BAD_RESP_ERR:
|
||||
return IBV_WC_BAD_RESP_ERR;
|
||||
case IONIC_STS_LOCAL_ACC_ERR:
|
||||
return IBV_WC_LOC_ACCESS_ERR;
|
||||
case IONIC_STS_REMOTE_INV_REQ_ERR:
|
||||
return IBV_WC_REM_INV_REQ_ERR;
|
||||
case IONIC_STS_REMOTE_ACC_ERR:
|
||||
return IBV_WC_REM_ACCESS_ERR;
|
||||
case IONIC_STS_REMOTE_OPER_ERR:
|
||||
return IBV_WC_REM_OP_ERR;
|
||||
case IONIC_STS_RETRY_EXCEEDED:
|
||||
return IBV_WC_RETRY_EXC_ERR;
|
||||
case IONIC_STS_RNR_RETRY_EXCEEDED:
|
||||
return IBV_WC_RNR_RETRY_EXC_ERR;
|
||||
case IONIC_STS_XRC_VIO_ERR:
|
||||
default:
|
||||
return IBV_WC_GENERAL_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
/* fw abi v1 */
|
||||
|
||||
/* data payload part of v1 wqe */
|
||||
union ionic_v1_pld {
|
||||
struct ionic_sge sgl[2];
|
||||
__be32 spec32[8];
|
||||
__be16 spec16[16];
|
||||
__u8 data[32];
|
||||
};
|
||||
|
||||
/* completion queue v1 cqe */
|
||||
struct ionic_v1_cqe {
|
||||
union {
|
||||
struct {
|
||||
__le64 wqe_idx_timestamp;
|
||||
__be32 src_qpn_op;
|
||||
__u8 src_mac[6];
|
||||
__be16 vlan_tag;
|
||||
__be32 imm_data_rkey;
|
||||
} recv;
|
||||
struct {
|
||||
__u8 rsvd[4];
|
||||
__be32 msg_msn;
|
||||
__u8 rsvd2[8];
|
||||
__le64 npg_wqe_idx_timestamp;
|
||||
} send;
|
||||
};
|
||||
__be32 status_length;
|
||||
__be32 qid_type_flags;
|
||||
};
|
||||
|
||||
/* bits for cqe wqe_idx and timestamp */
|
||||
enum ionic_v1_cqe_wqe_idx_timestamp_bits {
|
||||
IONIC_V1_CQE_WQE_IDX_MASK = 0xffff,
|
||||
IONIC_V1_CQE_TIMESTAMP_SHIFT = 16,
|
||||
};
|
||||
|
||||
/* bits for cqe recv */
|
||||
enum ionic_v1_cqe_src_qpn_bits {
|
||||
IONIC_V1_CQE_RECV_QPN_MASK = 0xffffff,
|
||||
IONIC_V1_CQE_RECV_OP_SHIFT = 24,
|
||||
|
||||
/* MASK could be 0x3, but need 0x1f for makeshift values:
|
||||
* OP_TYPE_RDMA_OPER_WITH_IMM, OP_TYPE_SEND_RCVD
|
||||
*/
|
||||
IONIC_V1_CQE_RECV_OP_MASK = 0x1f,
|
||||
IONIC_V1_CQE_RECV_OP_SEND = 0,
|
||||
IONIC_V1_CQE_RECV_OP_SEND_INV = 1,
|
||||
IONIC_V1_CQE_RECV_OP_SEND_IMM = 2,
|
||||
IONIC_V1_CQE_RECV_OP_RDMA_IMM = 3,
|
||||
|
||||
IONIC_V1_CQE_RECV_IS_IPV4 = BIT(7 + IONIC_V1_CQE_RECV_OP_SHIFT),
|
||||
IONIC_V1_CQE_RECV_IS_VLAN = BIT(6 + IONIC_V1_CQE_RECV_OP_SHIFT),
|
||||
};
|
||||
|
||||
/* bits for cqe qid_type_flags */
|
||||
enum ionic_v1_cqe_qtf_bits {
|
||||
IONIC_V1_CQE_COLOR = BIT(0),
|
||||
IONIC_V1_CQE_ERROR = BIT(1),
|
||||
IONIC_V1_CQE_TYPE_SHIFT = 5,
|
||||
IONIC_V1_CQE_TYPE_MASK = 0x7,
|
||||
IONIC_V1_CQE_QID_SHIFT = 8,
|
||||
|
||||
IONIC_V1_CQE_TYPE_RECV = 1,
|
||||
IONIC_V1_CQE_TYPE_SEND_MSN = 2,
|
||||
IONIC_V1_CQE_TYPE_SEND_NPG = 3,
|
||||
IONIC_V1_CQE_TYPE_RECV_INDIR = 4,
|
||||
};
|
||||
|
||||
#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_HCC__)
|
||||
|
||||
static inline bool ionic_v1_cqe_color(struct ionic_v1_cqe *cqe)
|
||||
{
|
||||
return !!(cqe->qid_type_flags & htobe32(IONIC_V1_CQE_COLOR));
|
||||
}
|
||||
|
||||
static inline bool ionic_v1_cqe_error(struct ionic_v1_cqe *cqe)
|
||||
{
|
||||
return !!(cqe->qid_type_flags & htobe32(IONIC_V1_CQE_ERROR));
|
||||
}
|
||||
|
||||
static inline bool ionic_v1_cqe_recv_is_ipv4(struct ionic_v1_cqe *cqe)
|
||||
{
|
||||
return !!(cqe->recv.src_qpn_op &
|
||||
htobe32(IONIC_V1_CQE_RECV_IS_IPV4));
|
||||
}
|
||||
|
||||
static inline bool ionic_v1_cqe_recv_is_vlan(struct ionic_v1_cqe *cqe)
|
||||
{
|
||||
return !!(cqe->recv.src_qpn_op &
|
||||
htobe32(IONIC_V1_CQE_RECV_IS_VLAN));
|
||||
}
|
||||
|
||||
static inline void ionic_v1_cqe_clean(struct ionic_v1_cqe *cqe)
|
||||
{
|
||||
cqe->qid_type_flags |= htobe32(~0u << IONIC_V1_CQE_QID_SHIFT);
|
||||
}
|
||||
|
||||
static inline uint32_t ionic_v1_cqe_qtf(struct ionic_v1_cqe *cqe)
|
||||
{
|
||||
return be32toh(cqe->qid_type_flags);
|
||||
}
|
||||
|
||||
#endif // !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_HCC__)
|
||||
|
||||
static inline uint8_t ionic_v1_cqe_qtf_type(uint32_t qtf)
|
||||
{
|
||||
return (qtf >> IONIC_V1_CQE_TYPE_SHIFT) & IONIC_V1_CQE_TYPE_MASK;
|
||||
}
|
||||
|
||||
static inline uint32_t ionic_v1_cqe_qtf_qid(uint32_t qtf)
|
||||
{
|
||||
return qtf >> IONIC_V1_CQE_QID_SHIFT;
|
||||
}
|
||||
|
||||
/* v1 base wqe header */
|
||||
struct ionic_v1_base_hdr {
|
||||
__le64 wqe_idx;
|
||||
__u8 op;
|
||||
__u8 num_sge_key;
|
||||
__be16 flags;
|
||||
__be32 imm_data_key;
|
||||
};
|
||||
|
||||
/* v1 receive wqe body */
|
||||
struct ionic_v1_recv_bdy {
|
||||
__u8 rsvd[16];
|
||||
union ionic_v1_pld pld;
|
||||
};
|
||||
|
||||
/* v1 send/rdma wqe body (common, has sgl) */
|
||||
struct ionic_v1_common_bdy {
|
||||
union {
|
||||
struct {
|
||||
__be32 ah_id;
|
||||
__be32 dest_qpn;
|
||||
__be32 dest_qkey;
|
||||
} send;
|
||||
struct {
|
||||
__be32 remote_va_high;
|
||||
__be32 remote_va_low;
|
||||
__be32 remote_rkey;
|
||||
} rdma;
|
||||
};
|
||||
__be32 length;
|
||||
union ionic_v1_pld pld;
|
||||
};
|
||||
|
||||
/* v1 atomic wqe body */
|
||||
struct ionic_v1_atomic_bdy {
|
||||
__be32 remote_va_high;
|
||||
__be32 remote_va_low;
|
||||
__be32 remote_rkey;
|
||||
__be32 swap_add_high;
|
||||
__be32 swap_add_low;
|
||||
__be32 compare_high;
|
||||
__be32 compare_low;
|
||||
__u8 rsvd[4];
|
||||
struct ionic_sge sge;
|
||||
};
|
||||
|
||||
/* v2 atomic wqe body */
|
||||
struct ionic_v2_atomic_bdy {
|
||||
__be32 remote_va_high;
|
||||
__be32 remote_va_low;
|
||||
__be32 remote_rkey;
|
||||
__be32 swap_add_high;
|
||||
__be32 swap_add_low;
|
||||
__be32 compare_high;
|
||||
__be32 compare_low;
|
||||
__be32 lkey;
|
||||
__be64 local_va;
|
||||
__u8 rsvd_expdb[8];
|
||||
};
|
||||
|
||||
/* v1 bind mw wqe body */
|
||||
struct ionic_v1_bind_mw_bdy {
|
||||
__be64 va;
|
||||
__be64 length;
|
||||
__be32 lkey;
|
||||
__be16 flags;
|
||||
__u8 rsvd[26];
|
||||
};
|
||||
|
||||
/* v1 send/recv wqe */
|
||||
struct ionic_v1_wqe {
|
||||
struct ionic_v1_base_hdr base;
|
||||
union {
|
||||
struct ionic_v1_recv_bdy recv;
|
||||
struct ionic_v1_common_bdy common;
|
||||
struct ionic_v1_atomic_bdy atomic;
|
||||
struct ionic_v2_atomic_bdy atomic_v2;
|
||||
struct ionic_v1_bind_mw_bdy bind_mw;
|
||||
};
|
||||
};
|
||||
|
||||
/* queue pair v1 send opcodes */
|
||||
enum ionic_v1_op {
|
||||
IONIC_V1_OP_SEND,
|
||||
IONIC_V1_OP_SEND_INV,
|
||||
IONIC_V1_OP_SEND_IMM,
|
||||
IONIC_V1_OP_RDMA_READ,
|
||||
IONIC_V1_OP_RDMA_WRITE,
|
||||
IONIC_V1_OP_RDMA_WRITE_IMM,
|
||||
IONIC_V1_OP_ATOMIC_CS,
|
||||
IONIC_V1_OP_ATOMIC_FA,
|
||||
IONIC_V1_OP_REG_MR,
|
||||
IONIC_V1_OP_LOCAL_INV,
|
||||
IONIC_V1_OP_BIND_MW,
|
||||
|
||||
/* flags */
|
||||
IONIC_V1_FLAG_FENCE = BIT(0),
|
||||
IONIC_V1_FLAG_SOL = BIT(1),
|
||||
IONIC_V1_FLAG_INL = BIT(2),
|
||||
IONIC_V1_FLAG_SIG = BIT(3),
|
||||
IONIC_V1_FLAG_COLOR = BIT(4),
|
||||
|
||||
/* flags last four bits for sgl spec format */
|
||||
IONIC_V1_FLAG_SPEC32 = (1u << 12),
|
||||
IONIC_V1_FLAG_SPEC16 = (2u << 12),
|
||||
IONIC_V1_SPEC_FIRST_SGE = 2,
|
||||
};
|
||||
|
||||
/* queue pair v2 send opcodes */
|
||||
enum ionic_v2_op {
|
||||
IONIC_V2_OPSL_OUT = 0x20,
|
||||
IONIC_V2_OPSL_IMM = 0x40,
|
||||
IONIC_V2_OPSL_INV = 0x80,
|
||||
|
||||
IONIC_V2_OP_SEND = 0x0 | IONIC_V2_OPSL_OUT,
|
||||
IONIC_V2_OP_SEND_IMM = IONIC_V2_OP_SEND | IONIC_V2_OPSL_IMM,
|
||||
IONIC_V2_OP_SEND_INV = IONIC_V2_OP_SEND | IONIC_V2_OPSL_INV,
|
||||
|
||||
IONIC_V2_OP_RDMA_WRITE = 0x1 | IONIC_V2_OPSL_OUT,
|
||||
IONIC_V2_OP_RDMA_WRITE_IMM = IONIC_V2_OP_RDMA_WRITE | IONIC_V2_OPSL_IMM,
|
||||
|
||||
IONIC_V2_OP_RDMA_READ = 0x2,
|
||||
|
||||
IONIC_V2_OP_ATOMIC_CS = 0x4,
|
||||
IONIC_V2_OP_ATOMIC_FA = 0x5,
|
||||
IONIC_V2_OP_REG_MR = 0x6,
|
||||
IONIC_V2_OP_LOCAL_INV = 0x7,
|
||||
IONIC_V2_OP_BIND_MW = 0x8,
|
||||
};
|
||||
|
||||
#if !defined(__cplusplus)
|
||||
|
||||
static inline size_t ionic_v1_send_wqe_min_size(int min_sge, int min_data,
|
||||
int spec, bool expdb)
|
||||
{
|
||||
size_t sz_wqe, sz_sgl, sz_data;
|
||||
|
||||
if (spec > IONIC_V1_SPEC_FIRST_SGE)
|
||||
min_sge += IONIC_V1_SPEC_FIRST_SGE;
|
||||
|
||||
if (expdb) {
|
||||
min_sge += 1;
|
||||
min_data += IONIC_EXP_DBELL_SZ;
|
||||
}
|
||||
|
||||
sz_wqe = sizeof(struct ionic_v1_wqe);
|
||||
sz_sgl = offsetof(struct ionic_v1_wqe, common.pld.sgl[min_sge]);
|
||||
sz_data = offsetof(struct ionic_v1_wqe, common.pld.data[min_data]);
|
||||
|
||||
if (sz_sgl > sz_wqe)
|
||||
sz_wqe = sz_sgl;
|
||||
|
||||
if (sz_data > sz_wqe)
|
||||
sz_wqe = sz_data;
|
||||
|
||||
return roundup_pow_of_two(sz_wqe);
|
||||
}
|
||||
|
||||
static inline int ionic_v1_send_wqe_max_sge(uint8_t stride_log2, int spec, bool expdb)
|
||||
{
|
||||
struct ionic_v1_wqe *wqe = (void *)0;
|
||||
struct ionic_sge *sge = (void *)(1ull << stride_log2);
|
||||
int num_sge = 0;
|
||||
|
||||
if (expdb)
|
||||
sge -= 1;
|
||||
|
||||
if (spec > IONIC_V1_SPEC_FIRST_SGE)
|
||||
num_sge = IONIC_V1_SPEC_FIRST_SGE;
|
||||
|
||||
num_sge = sge - &wqe->common.pld.sgl[num_sge];
|
||||
|
||||
if (spec && num_sge > spec)
|
||||
num_sge = spec;
|
||||
|
||||
return num_sge;
|
||||
}
|
||||
|
||||
static inline int ionic_v1_send_wqe_max_data(uint8_t stride_log2, bool expdb)
|
||||
{
|
||||
struct ionic_v1_wqe *wqe = (void *)0;
|
||||
__u8 *data = (void *)(1ull << stride_log2);
|
||||
|
||||
if (expdb)
|
||||
data -= IONIC_EXP_DBELL_SZ;
|
||||
|
||||
return data - wqe->common.pld.data;
|
||||
}
|
||||
|
||||
static inline size_t ionic_v1_recv_wqe_min_size(int min_sge, int spec, bool expdb)
|
||||
{
|
||||
size_t sz_wqe, sz_sgl;
|
||||
|
||||
if (spec > IONIC_V1_SPEC_FIRST_SGE)
|
||||
min_sge += IONIC_V1_SPEC_FIRST_SGE;
|
||||
|
||||
if (expdb)
|
||||
min_sge += 1;
|
||||
|
||||
sz_wqe = sizeof(struct ionic_v1_wqe);
|
||||
sz_sgl = offsetof(struct ionic_v1_wqe, recv.pld.sgl[min_sge]);
|
||||
|
||||
if (sz_sgl > sz_wqe)
|
||||
sz_wqe = sz_sgl;
|
||||
|
||||
return sz_wqe;
|
||||
}
|
||||
|
||||
static inline int ionic_v1_recv_wqe_max_sge(uint8_t stride_log2, int spec, bool expdb)
|
||||
{
|
||||
struct ionic_v1_wqe *wqe = (void *)0;
|
||||
struct ionic_sge *sge = (void *)(1ull << stride_log2);
|
||||
int num_sge = 0;
|
||||
|
||||
if (expdb)
|
||||
sge -= 1;
|
||||
|
||||
if (spec > IONIC_V1_SPEC_FIRST_SGE)
|
||||
num_sge = IONIC_V1_SPEC_FIRST_SGE;
|
||||
|
||||
num_sge = sge - &wqe->recv.pld.sgl[num_sge];
|
||||
|
||||
if (spec && num_sge > spec)
|
||||
num_sge = spec;
|
||||
|
||||
return num_sge;
|
||||
}
|
||||
|
||||
static inline int ionic_v1_use_spec_sge(int min_sge, int spec)
|
||||
{
|
||||
if (!spec || min_sge > spec)
|
||||
return 0;
|
||||
|
||||
if (min_sge <= IONIC_V1_SPEC_FIRST_SGE)
|
||||
return IONIC_V1_SPEC_FIRST_SGE;
|
||||
|
||||
return spec;
|
||||
}
|
||||
|
||||
#define IONIC_RCQ_SIZE 4096
|
||||
#define IONIC_RCQ_DEPTH 128
|
||||
#define IONIC_RCQ_DEPTH_LOG2 7
|
||||
#define IONIC_RCQ_STRIDE_LOG2 4
|
||||
|
||||
struct ionic_rcq_hdr {
|
||||
uint8_t pad[60];
|
||||
uint32_t seq_pad;
|
||||
};
|
||||
|
||||
struct ionic_rcqe {
|
||||
uint32_t status_length;
|
||||
uint32_t imm_data;
|
||||
uint32_t seq_flags;
|
||||
uint32_t rsvd;
|
||||
};
|
||||
|
||||
enum ionic_rcqe_flag {
|
||||
IONIC_RCQE_C = BIT(7),
|
||||
IONIC_RCQE_I = BIT(6),
|
||||
};
|
||||
|
||||
struct ionic_rcq {
|
||||
struct ionic_rcq_hdr hdr;
|
||||
struct ionic_rcqe ring[IONIC_RCQ_DEPTH];
|
||||
};
|
||||
|
||||
static inline uint32_t ionic_rcq_hdr_seq(struct ionic_rcq_hdr *hdr)
|
||||
{
|
||||
return be32toh(hdr->seq_pad) >> 8;
|
||||
}
|
||||
|
||||
static inline uint32_t ionic_rcqe_seq(struct ionic_rcqe *rcqe)
|
||||
{
|
||||
return be32toh(rcqe->seq_flags) >> 8;
|
||||
}
|
||||
|
||||
static inline bool ionic_rcqe_color(struct ionic_rcqe *rcqe)
|
||||
{
|
||||
return !!(rcqe->seq_flags & htobe32(IONIC_RCQE_C));
|
||||
}
|
||||
|
||||
static inline bool ionic_rcqe_imm(struct ionic_rcqe *rcqe)
|
||||
{
|
||||
return !!(rcqe->seq_flags & htobe32(IONIC_RCQE_I));
|
||||
}
|
||||
|
||||
#endif // !defined(__cplusplus)
|
||||
|
||||
#endif /* IONIC_FW_H */
|
||||
@@ -25,11 +25,19 @@
|
||||
#ifndef LIBRARY_SRC_GDA_IONIC_GDA_PROVIDER_HPP_
|
||||
#define LIBRARY_SRC_GDA_IONIC_GDA_PROVIDER_HPP_
|
||||
|
||||
#ifdef GDA_IONIC
|
||||
extern "C" {
|
||||
#include <infiniband/ionic_dv.h>
|
||||
#include <infiniband/ionic_fw.h>
|
||||
#include "gda/ionic/ionic_dv.h"
|
||||
#include "gda/ionic/ionic_fw.h"
|
||||
}
|
||||
#endif
|
||||
|
||||
struct ionicdv_funcs_t {
|
||||
int (*get_ctx)(struct ionic_dv_ctx *dvctx, struct ibv_context *ibctx);
|
||||
uint8_t (*qp_get_udma_idx)(struct ibv_qp *ibqp);
|
||||
int (*get_cq)(struct ionic_dv_cq *dvcq, struct ibv_cq *ibcq, uint8_t udma_idx);
|
||||
int (*get_qp)(struct ionic_dv_qp *dvqp, struct ibv_qp *ibqp);
|
||||
int (*pd_set_sqcmb)(struct ibv_pd *ibpd, bool enable, bool expdb, bool require);
|
||||
int (*pd_set_rqcmb)(struct ibv_pd *ibpd, bool enable, bool expdb, bool require);
|
||||
int (*pd_set_udma_mask)(struct ibv_pd *ibpd, uint8_t udma_mask);
|
||||
};
|
||||
|
||||
#endif //LIBRARY_SRC_GDA_IONIC_GDA_PROVIDER_HPP_
|
||||
|
||||
@@ -0,0 +1,312 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "gda/queue_pair.hpp"
|
||||
#include "gda/endian.hpp"
|
||||
#include "util.hpp"
|
||||
#include "containers/free_list_impl.hpp"
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
__device__ uint64_t QueuePair::get_same_qp_lane_mask() {
|
||||
uint64_t lane_mask = get_active_lane_mask();
|
||||
uintptr_t this_val = reinterpret_cast<uintptr_t>(this);
|
||||
|
||||
// exclude threads operating on a different qp from this thread lane mask
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
uint64_t bit_i = 1ull << i;
|
||||
if ((lane_mask & bit_i) && __shfl(this_val, i) != this_val) {
|
||||
lane_mask &= ~bit_i;
|
||||
}
|
||||
}
|
||||
|
||||
return lane_mask;
|
||||
}
|
||||
|
||||
__device__ uint32_t QueuePair::reserve_sq(uint64_t activemask, uint32_t num_wqes) {
|
||||
uint32_t my_sq_prod = 0;
|
||||
|
||||
// reserve space for wqes in sq
|
||||
if (is_first_active_lane(activemask)) {
|
||||
my_sq_prod = __hip_atomic_fetch_add(&sq_prod, num_wqes, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
my_sq_prod = __shfl(my_sq_prod, get_first_active_lane_id(activemask));
|
||||
|
||||
// wait for that space to be available
|
||||
ionic_quiet_internal(activemask, my_sq_prod + num_wqes - sq_mask);
|
||||
|
||||
return my_sq_prod;
|
||||
}
|
||||
|
||||
__device__ uint32_t QueuePair::commit_sq(uint64_t activemask, uint32_t my_sq_prod, uint32_t my_sq_pos, uint32_t num_wqes) {
|
||||
uint32_t dbprod = my_sq_prod + num_wqes;
|
||||
|
||||
spin_lock_acquire_shared(&sq_lock, activemask);
|
||||
|
||||
if (is_first_active_lane(activemask) && ((sq_dbprod - dbprod) & (1u << 31))) {
|
||||
sq_dbprod = dbprod;
|
||||
|
||||
ionic_ring_doorbell(dbprod);
|
||||
}
|
||||
|
||||
spin_lock_release_shared(&sq_lock, activemask);
|
||||
|
||||
return dbprod;
|
||||
}
|
||||
|
||||
__device__ void QueuePair::poll_wave_cqes(uint64_t activemask) {
|
||||
uint32_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
uint32_t my_cq_pos = cq_pos + my_logical_lane_id;
|
||||
|
||||
/* Look at the cqe at the current position in the cq buffer */
|
||||
struct ionic_v1_cqe *cqe = &ionic_cq_buf[my_cq_pos & cq_mask];
|
||||
|
||||
/* Determine expected color based on cq wrap count */
|
||||
uint32_t qtf_color_bit = swap_endian_val<uint32_t>(IONIC_V1_CQE_COLOR);
|
||||
uint32_t qtf_color_exp = qtf_color_bit;
|
||||
if (my_cq_pos & (cq_mask + 1)) {
|
||||
qtf_color_exp = 0;
|
||||
}
|
||||
|
||||
/* Check if my cqe color == expected color */
|
||||
uint32_t qtf_be = *(volatile uint32_t *)(&cqe->qid_type_flags);
|
||||
if ((qtf_be & qtf_color_bit) != qtf_color_exp) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t msn = swap_endian_val<uint32_t>(cqe->send.msg_msn);
|
||||
|
||||
/* Report if the completion indicates an error. */
|
||||
if (!!(qtf_be & swap_endian_val<uint32_t>(IONIC_V1_CQE_ERROR))) {
|
||||
#if defined(DEBUG)
|
||||
uint32_t qtf = swap_endian_val<uint32_t>(qtf_be);
|
||||
uint32_t qid = qtf >> IONIC_V1_CQE_QID_SHIFT;
|
||||
uint32_t type = (qtf >> IONIC_V1_CQE_TYPE_SHIFT) & IONIC_V1_CQE_TYPE_MASK;
|
||||
uint32_t flag = qtf & 0xf;
|
||||
uint32_t status = swap_endian_val<uint32_t>(cqe->status_length);
|
||||
uint64_t npg = cqe->send.npg_wqe_idx_timestamp & IONIC_V1_CQE_WQE_IDX_MASK;
|
||||
|
||||
printf("QUIET ERROR: %s qid %u type %u flag %#x status %u msn %u npg %lu\n",
|
||||
dev_name, qid, type, flag, status, msn, npg);
|
||||
#endif
|
||||
/* No other way to signal an error, so just crash. */
|
||||
abort();
|
||||
}
|
||||
|
||||
/* Only proceed with the furthest ahead cqe to update the sq state */
|
||||
uint64_t my_lane_mask = 1ull << __lane_id();
|
||||
uint64_t lesser_lane_mask = my_lane_mask - 1;
|
||||
if (my_lane_mask != (__ballot(true) & activemask & ~lesser_lane_mask)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* update position in the cq */
|
||||
cq_pos = my_cq_pos + 1;
|
||||
|
||||
/*
|
||||
* Ring cq doorbell frequently enough to avoid cq full.
|
||||
*
|
||||
* NB: IONIC_CQ_GRACE is 100
|
||||
*/
|
||||
if (((cq_pos - cq_dbpos) & cq_mask) >= 100) {
|
||||
cq_dbpos = cq_pos;
|
||||
__atomic_store_n(cq_dbreg, cq_dbval | (cq_mask & cq_dbpos), __ATOMIC_SEQ_CST); //TODO:maybe relaxed?
|
||||
}
|
||||
|
||||
sq_msn = msn;
|
||||
}
|
||||
|
||||
__device__ void QueuePair::ionic_quiet_internal(uint64_t activemask, uint32_t cons) {
|
||||
uint32_t greed = 10;
|
||||
|
||||
/* wait for sq_msn to catch up or pass cons. */
|
||||
/* 0x800000 - sign bit for 24-bit fields */
|
||||
while ((sq_msn - cons) & 0x800000) {
|
||||
if (!spin_lock_try_acquire_shared(&cq_lock, activemask)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* with lock acquired, this wave polls cqes until caught up */
|
||||
while ((sq_msn - cons) & 0x800000) {
|
||||
uint32_t old_sq_msn = sq_msn;
|
||||
|
||||
poll_wave_cqes(activemask);
|
||||
|
||||
if (!((sq_msn - cons) & 0x800000)) {
|
||||
if (sq_msn == old_sq_msn) {
|
||||
break;
|
||||
}
|
||||
if (!greed) {
|
||||
break;
|
||||
}
|
||||
--greed;
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock_release_shared(&cq_lock, activemask);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void QueuePair::ionic_ring_doorbell(uint32_t pos) {
|
||||
// TODO When threads write at once to the same address, not all writes reach the bus.
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
if (__lane_id() == i) {
|
||||
__threadfence();
|
||||
__atomic_store_n(sq_dbreg, sq_dbval | (sq_mask & pos), __ATOMIC_SEQ_CST);
|
||||
}
|
||||
}
|
||||
__threadfence();
|
||||
}
|
||||
|
||||
__device__ void QueuePair::ionic_quiet() {
|
||||
ionic_quiet_internal(get_same_qp_lane_mask(), sq_prod);
|
||||
}
|
||||
|
||||
__device__ void QueuePair::ionic_post_wqe_rma(int pe, int32_t size, uintptr_t *laddr, uintptr_t *raddr, uint8_t opcode) {
|
||||
uint64_t activemask = get_same_qp_lane_mask();
|
||||
uint32_t num_wqes = get_active_lane_count(activemask);
|
||||
uint32_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
uint32_t my_sq_prod = reserve_sq(activemask, num_wqes);
|
||||
uint32_t my_sq_pos = my_sq_prod + my_logical_lane_id;
|
||||
struct ionic_v1_wqe *wqe = &ionic_sq_buf[my_sq_pos & sq_mask];
|
||||
uint16_t wqe_flags = 0;
|
||||
|
||||
if (!(my_sq_pos & (sq_mask + 1))) {
|
||||
wqe_flags |= swap_endian_val<uint16_t>(IONIC_V1_FLAG_COLOR);
|
||||
}
|
||||
|
||||
if (is_last_active_lane(activemask)) {
|
||||
wqe_flags |= swap_endian_val<uint16_t>(IONIC_V1_FLAG_SIG);
|
||||
}
|
||||
|
||||
// TODO why is this needed?
|
||||
if (size && !laddr && opcode == IONIC_V2_OP_RDMA_WRITE) {
|
||||
size = 1;
|
||||
}
|
||||
|
||||
wqe->base.wqe_idx = my_sq_pos;
|
||||
wqe->base.op = opcode;
|
||||
wqe->base.num_sge_key = size ? 1 : 0;
|
||||
wqe->base.imm_data_key = swap_endian_val<uint32_t>(0);
|
||||
|
||||
wqe->common.rdma.remote_va_high = swap_endian_val<uint32_t>(reinterpret_cast<uint64_t>(raddr) >> 32);
|
||||
wqe->common.rdma.remote_va_low = swap_endian_val<uint32_t>(reinterpret_cast<uint64_t>(raddr));
|
||||
wqe->common.rdma.remote_rkey = swap_endian_val<uint32_t>(rkey);
|
||||
wqe->common.length = swap_endian_val<uint32_t>(size);
|
||||
|
||||
if (size) {
|
||||
if (opcode == IONIC_V2_OP_RDMA_WRITE && size <= inline_threshold) {
|
||||
wqe_flags |= swap_endian_val<uint16_t>(IONIC_V1_FLAG_INL);
|
||||
wqe->base.num_sge_key = 0;
|
||||
if (!laddr) {
|
||||
// TODO why is this needed?
|
||||
wqe->common.pld.data[0] = 1;
|
||||
} else {
|
||||
memcpy(wqe->common.pld.data, laddr, size);
|
||||
}
|
||||
} else {
|
||||
wqe->common.pld.sgl[0].va = swap_endian_val<uint64_t>(reinterpret_cast<uint64_t>(laddr));
|
||||
wqe->common.pld.sgl[0].len = swap_endian_val<uint32_t>(size);
|
||||
wqe->common.pld.sgl[0].lkey = swap_endian_val<uint32_t>(lkey);
|
||||
}
|
||||
}
|
||||
|
||||
__hip_atomic_store(&wqe->base.flags, wqe_flags, __ATOMIC_RELEASE, __HIP_MEMORY_SCOPE_AGENT);
|
||||
|
||||
commit_sq(activemask, my_sq_prod, my_sq_pos, num_wqes);
|
||||
}
|
||||
|
||||
__device__ uint64_t QueuePair::ionic_post_wqe_amo(int pe, int32_t size, uintptr_t *raddr, uint8_t opcode,
|
||||
int64_t atomic_data, int64_t atomic_cmp, bool fetching) {
|
||||
uint64_t activemask = get_same_qp_lane_mask();
|
||||
uint32_t num_wqes = get_active_lane_count(activemask);
|
||||
uint32_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
bool is_leader{my_logical_lane_id == 0};
|
||||
const uint64_t leader_phys_lane_id = get_first_active_lane_id(activemask);
|
||||
uint32_t my_sq_prod = reserve_sq(activemask, num_wqes);
|
||||
uint32_t my_sq_pos = my_sq_prod + my_logical_lane_id;
|
||||
struct ionic_v1_wqe *wqe = &ionic_sq_buf[my_sq_pos & sq_mask];
|
||||
uint16_t wqe_flags = 0;
|
||||
uint32_t cons;
|
||||
|
||||
uint64_t* wave_fetch_atomic{nullptr};
|
||||
if (fetching) {
|
||||
if (is_leader) {
|
||||
auto res = fetching_atomic_freelist->pop_front();
|
||||
while (!res.success) {
|
||||
res = fetching_atomic_freelist->pop_front();
|
||||
}
|
||||
wave_fetch_atomic = res.value;
|
||||
}
|
||||
wave_fetch_atomic = (uint64_t*)__shfl((uint64_t)wave_fetch_atomic, leader_phys_lane_id);
|
||||
}
|
||||
|
||||
if (!(my_sq_pos & (sq_mask + 1))) {
|
||||
wqe_flags |= swap_endian_val<uint16_t>(IONIC_V1_FLAG_COLOR);
|
||||
}
|
||||
|
||||
if (is_last_active_lane(activemask)) {
|
||||
wqe_flags |= swap_endian_val<uint16_t>(IONIC_V1_FLAG_SIG);
|
||||
}
|
||||
|
||||
wqe->base.wqe_idx = my_sq_pos;
|
||||
wqe->base.op = opcode;
|
||||
wqe->base.num_sge_key = 1;
|
||||
wqe->base.imm_data_key = swap_endian_val<uint32_t>(0);
|
||||
|
||||
wqe->atomic_v2.remote_va_high = swap_endian_val<uint32_t>(reinterpret_cast<uint64_t>(raddr) >> 32);
|
||||
wqe->atomic_v2.remote_va_low = swap_endian_val<uint32_t>(reinterpret_cast<uint64_t>(raddr));
|
||||
wqe->atomic_v2.remote_rkey = swap_endian_val<uint32_t>(rkey);
|
||||
wqe->atomic_v2.swap_add_high = swap_endian_val<uint32_t>(atomic_data >> 32);
|
||||
wqe->atomic_v2.swap_add_low = swap_endian_val<uint32_t>(atomic_data);
|
||||
wqe->atomic_v2.compare_high = swap_endian_val<uint32_t>(atomic_cmp >> 32);
|
||||
wqe->atomic_v2.compare_low = swap_endian_val<uint32_t>(atomic_cmp);
|
||||
|
||||
if (fetching) {
|
||||
wqe->atomic_v2.local_va = swap_endian_val<uint64_t>(reinterpret_cast<uint64_t>(wave_fetch_atomic + my_logical_lane_id));
|
||||
wqe->atomic_v2.lkey = swap_endian_val<uint32_t>(fetching_atomic_lkey);
|
||||
} else {
|
||||
wqe->atomic_v2.local_va = swap_endian_val<uint64_t>(reinterpret_cast<uint64_t>(nonfetching_atomic));
|
||||
wqe->atomic_v2.lkey = swap_endian_val<uint32_t>(nonfetching_atomic_lkey);
|
||||
}
|
||||
|
||||
__hip_atomic_store(&wqe->base.flags, wqe_flags, __ATOMIC_RELEASE, __HIP_MEMORY_SCOPE_AGENT);
|
||||
|
||||
cons = commit_sq(activemask, my_sq_prod, my_sq_pos, num_wqes);
|
||||
|
||||
uint64_t ret{0};
|
||||
if (fetching) {
|
||||
ionic_quiet_internal(activemask, cons);
|
||||
ret = wave_fetch_atomic[my_logical_lane_id];
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
if (is_leader) {
|
||||
fetching_atomic_freelist->push_back(wave_fetch_atomic);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace rocshmem
|
||||
@@ -0,0 +1,37 @@
|
||||
###############################################################################
|
||||
# Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
###############################################################################
|
||||
target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
backend_gda_mlx5.cpp
|
||||
)
|
||||
|
||||
if(GDA_MLX5)
|
||||
target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
queue_pair_mlx5.cpp
|
||||
segment_builder.cpp
|
||||
)
|
||||
endif()
|
||||
@@ -0,0 +1,122 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "gda/backend_gda.hpp"
|
||||
#include "util.hpp"
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
void* GDABackend::mlx5_dv_dlopen() {
|
||||
void* dv_handle{nullptr};
|
||||
dv_handle = dlopen("libmlx5.so", RTLD_NOW);
|
||||
if (!dv_handle) {
|
||||
DPRINTF("Could not open libmlx5.so. Returning\n");
|
||||
}
|
||||
return dv_handle;
|
||||
}
|
||||
|
||||
int GDABackend::mlx5_dv_dl_init() {
|
||||
mlx5dv_handle_ = mlx5_dv_dlopen();
|
||||
if (!mlx5dv_handle_)
|
||||
return ROCSHMEM_ERROR;
|
||||
|
||||
DLSYM_HELPER(mlx5dv, mlx5dv_, mlx5dv_handle_, init_obj);
|
||||
return ROCSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
void GDABackend::mlx5_initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
|
||||
mlx5dv_cq cq_out;
|
||||
mlx5dv_obj mlx_obj;
|
||||
mlx_obj.cq.in = cqs[conn_num];
|
||||
mlx_obj.cq.out = &cq_out;
|
||||
mlx5dv.init_obj(&mlx_obj, MLX5DV_OBJ_CQ);
|
||||
dump_mlx5dv_cq(&cq_out, conn_num);
|
||||
|
||||
/*
|
||||
* struct mlx5dv_cq {
|
||||
* void *buf;
|
||||
* __be32 *dbrec;
|
||||
* uint32_t cqe_cnt;
|
||||
* uint32_t cqe_size;
|
||||
* void *cq_uar;
|
||||
* uint32_t cqn;
|
||||
* uint64_t comp_mask;
|
||||
* };
|
||||
*/
|
||||
|
||||
gpu_qp->cq_buf = reinterpret_cast<mlx5_cqe64*>(cq_out.buf);
|
||||
gpu_qp->cq_cnt = cq_out.cqe_cnt;
|
||||
gpu_qp->cq_log_cnt = log2(cq_out.cqe_cnt);
|
||||
gpu_qp->cq_dbrec = cq_out.dbrec;
|
||||
|
||||
mlx5dv_qp qp_out;
|
||||
mlx_obj.qp.in = qps[conn_num];
|
||||
mlx_obj.qp.out = &qp_out;
|
||||
mlx5dv.init_obj(&mlx_obj, MLX5DV_OBJ_QP);
|
||||
dump_mlx5dv_qp(&qp_out, conn_num);
|
||||
|
||||
/*
|
||||
* struct mlx5dv_qp {
|
||||
* __be32 *dbrec;
|
||||
* struct {
|
||||
* void *buf;
|
||||
* uint32_t wqe_cnt;
|
||||
* uint32_t stride;
|
||||
* } sq;
|
||||
* struct {
|
||||
* void *buf;
|
||||
* uint32_t wqe_cnt;
|
||||
* uint32_t stride;
|
||||
* } rq;
|
||||
* struct {
|
||||
* void *reg;
|
||||
* uint32_t size;
|
||||
* } bf;
|
||||
* uint64_t comp_mask;
|
||||
* off_t uar_mmap_offset;
|
||||
* uint32_t tirn;
|
||||
* uint32_t tisn;
|
||||
* uint32_t rqn;
|
||||
* uint32_t sqn;
|
||||
* uint64_t tir_icm_addr;
|
||||
* };
|
||||
*/
|
||||
|
||||
gpu_qp->dbrec = &qp_out.dbrec[1]; // points to two pointers: 0 -> MLX5_REC_DBR, 1 -> MLX5_SND_DBR
|
||||
gpu_qp->sq_buf = reinterpret_cast<uint64_t*>(qp_out.sq.buf);
|
||||
gpu_qp->sq_wqe_cnt = qp_out.sq.wqe_cnt;
|
||||
gpu_qp->rkey = htobe32(heap_rkey[conn_num % num_pes]);
|
||||
gpu_qp->lkey = htobe32(heap_mr->lkey);
|
||||
gpu_qp->qp_num = qps[conn_num]->qp_num;
|
||||
gpu_qp->inline_threshold = inline_threshold;
|
||||
// The 2 in qp_out.bf.size * 2 below facilitates the switching between blue flame registers
|
||||
|
||||
int hip_dev_id{-1};
|
||||
CHECK_HIP(hipGetDevice(&hip_dev_id));
|
||||
void* gpu_ptr{nullptr};
|
||||
rocm_memory_lock_to_fine_grain(qp_out.bf.reg, qp_out.bf.size * 2, &gpu_ptr, hip_dev_id);
|
||||
gpu_qp->db.ptr = reinterpret_cast<uint64_t*>(gpu_ptr);
|
||||
}
|
||||
|
||||
} // namespace rocshmem
|
||||
@@ -34,4 +34,8 @@ typedef union db_reg {
|
||||
uintptr_t uint;
|
||||
} db_reg_t;
|
||||
|
||||
struct mlx5dv_funcs_t {
|
||||
int (*init_obj)(struct mlx5dv_obj *obj, uint64_t obj_type);
|
||||
};
|
||||
|
||||
#endif //LIBRARY_SRC_GDA_MLX5_GDA_PROVIDER_HPP_
|
||||
|
||||
@@ -0,0 +1,273 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "gda/queue_pair.hpp"
|
||||
#include "util.hpp"
|
||||
#include "containers/free_list_impl.hpp"
|
||||
#include "gda/endian.hpp"
|
||||
#include "segment_builder.hpp"
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
__device__ void QueuePair::mlx5_ring_doorbell(uint64_t db_val, uint64_t my_sq_counter) {
|
||||
swap_endian_store(const_cast<uint32_t*>(dbrec), (uint32_t)my_sq_counter);
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
|
||||
__hip_atomic_store(db.ptr, db_val, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
uint64_t db_uint = __hip_atomic_load(&db.uint, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
db_uint ^= 0x100;
|
||||
__hip_atomic_store(&db.uint, db_uint, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
|
||||
__device__ void QueuePair::mlx5_quiet() {
|
||||
constexpr size_t BROADCAST_SIZE = 1024 / WF_SIZE;
|
||||
__shared__ uint64_t wqe_broadcast[BROADCAST_SIZE];
|
||||
uint8_t wavefront_id = get_flat_block_id() / WF_SIZE;
|
||||
wqe_broadcast[wavefront_id] = 0;
|
||||
|
||||
uint64_t activemask = get_active_lane_mask();
|
||||
uint8_t num_active_lanes = get_active_lane_count(activemask);
|
||||
uint8_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
bool is_leader{my_logical_lane_id == 0};
|
||||
const uint64_t leader_phys_lane_id = get_first_active_lane_id(activemask);
|
||||
|
||||
while (true) {
|
||||
bool done{false};
|
||||
uint64_t quiet_amount{0};
|
||||
uint64_t wave_cq_consumer{0};
|
||||
while (!done) {
|
||||
uint64_t active = __hip_atomic_load(&quiet_active, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
uint64_t posted = __hip_atomic_load(&quiet_posted, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
uint64_t completed = __hip_atomic_load(&quiet_completed, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
if (!(posted - completed)) {
|
||||
return;
|
||||
}
|
||||
int64_t quiet_val = posted - active;
|
||||
if (quiet_val <= 0) {
|
||||
continue;
|
||||
}
|
||||
quiet_amount = min(num_active_lanes, quiet_val);
|
||||
if (is_leader) {
|
||||
done = __hip_atomic_compare_exchange_strong(&quiet_active, &active, active + quiet_amount, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
if (done) {
|
||||
wave_cq_consumer = __hip_atomic_fetch_add(&cq_consumer, quiet_amount, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
}
|
||||
done = __shfl(done, leader_phys_lane_id);
|
||||
}
|
||||
wave_cq_consumer = __shfl(wave_cq_consumer, leader_phys_lane_id);
|
||||
uint64_t my_cq_consumer = wave_cq_consumer + my_logical_lane_id;
|
||||
uint64_t my_cq_index = my_cq_consumer % cq_cnt;
|
||||
|
||||
if (my_logical_lane_id < quiet_amount) {
|
||||
volatile mlx5_cqe64 *cqe_entry = &cq_buf[my_cq_index];
|
||||
uint16_t be_wqe_counter{0};
|
||||
uint8_t op_own{0};
|
||||
uint8_t owner_bit = (my_cq_consumer >> cq_log_cnt) & 1;
|
||||
bool vote_failed{true};
|
||||
|
||||
while (vote_failed) {
|
||||
op_own = *((volatile uint8_t*)&cqe_entry->op_own);
|
||||
bool my_ownership_vote = (op_own & 1) == owner_bit;
|
||||
bool my_opcode_vote = (op_own >> 4) != MLX5_CQE_INVALID;
|
||||
uint64_t votes = __ballot(my_ownership_vote && my_opcode_vote);
|
||||
vote_failed = __popcll(votes) < quiet_amount;
|
||||
if (!vote_failed) {
|
||||
be_wqe_counter = *((volatile uint16_t*)&cqe_entry->wqe_counter);
|
||||
}
|
||||
}
|
||||
|
||||
uint16_t wqe_counter;
|
||||
swap_endian_store(const_cast<uint16_t*>(&wqe_counter), reinterpret_cast<uint16_t>(be_wqe_counter));
|
||||
uint64_t wqe_id = outstanding_wqes[wqe_counter];
|
||||
__hip_atomic_fetch_max(&wqe_broadcast[wavefront_id], wqe_id, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
uint8_t mlx5_invld_bits = MLX5_CQE_INVALID << 4 | owner_bit;
|
||||
*((volatile uint8_t*)&cqe_entry->op_own) = mlx5_invld_bits;
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
}
|
||||
if (is_leader) {
|
||||
uint64_t completed {0};
|
||||
do {
|
||||
completed = __hip_atomic_load(&quiet_completed, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
} while (completed != wave_cq_consumer);
|
||||
|
||||
swap_endian_store(const_cast<uint32_t*>(cq_dbrec), (uint32_t)(wave_cq_consumer + quiet_amount));
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
|
||||
uint64_t sunk_wqe_id = wqe_broadcast[wavefront_id];
|
||||
__hip_atomic_fetch_max(&sq_sunk, sunk_wqe_id, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
__hip_atomic_fetch_add(&quiet_completed, quiet_amount, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void QueuePair::mlx5_post_wqe_rma(int pe, int32_t size, uintptr_t *laddr, uintptr_t *raddr, uint8_t opcode) {
|
||||
uint64_t activemask = get_active_lane_mask();
|
||||
uint8_t num_active_lanes = get_active_lane_count(activemask);
|
||||
uint8_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
bool is_leader{my_logical_lane_id == 0};
|
||||
const uint64_t leader_phys_lane_id = get_first_active_lane_id(activemask);
|
||||
uint8_t num_wqes{num_active_lanes};
|
||||
uint64_t wave_sq_counter{0};
|
||||
|
||||
if (is_leader) {
|
||||
wave_sq_counter = __hip_atomic_fetch_add(&sq_posted, num_wqes, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
wave_sq_counter = __shfl(wave_sq_counter, leader_phys_lane_id);
|
||||
uint64_t my_sq_counter = wave_sq_counter + my_logical_lane_id;
|
||||
uint64_t my_sq_index = my_sq_counter % sq_wqe_cnt;
|
||||
|
||||
while (true) {
|
||||
uint64_t db_touched = __hip_atomic_load(&sq_db_touched, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
uint64_t sunk = __hip_atomic_load(&sq_sunk, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
int64_t num_active_sq_entries = db_touched - sunk;
|
||||
if (num_active_sq_entries < 0) {
|
||||
continue;
|
||||
}
|
||||
uint64_t num_free_entries = min(sq_wqe_cnt, cq_cnt) - num_active_sq_entries;
|
||||
uint64_t num_entries_until_wave_last_entry = wave_sq_counter + num_active_lanes - db_touched;
|
||||
if (num_free_entries > num_entries_until_wave_last_entry) {
|
||||
break;
|
||||
}
|
||||
mlx5_quiet();
|
||||
}
|
||||
|
||||
outstanding_wqes[my_sq_counter % OUTSTANDING_TABLE_SIZE] = my_sq_counter;
|
||||
|
||||
SegmentBuilder seg_build(my_sq_index, sq_buf);
|
||||
seg_build.update_ctrl_seg(my_sq_counter, opcode, 0, qp_num, MLX5_WQE_CTRL_CQ_UPDATE, 3, 0, 0);
|
||||
seg_build.update_raddr_seg(raddr, rkey);
|
||||
|
||||
if (size <= inline_threshold && opcode == gda_op_rdma_write) {
|
||||
seg_build.update_inl_data_seg(laddr, size);
|
||||
} else {
|
||||
seg_build.update_data_seg(laddr, size, lkey);
|
||||
}
|
||||
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
|
||||
if (is_leader) {
|
||||
uint64_t db_touched {0};
|
||||
do {
|
||||
db_touched = __hip_atomic_load(&sq_db_touched, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
} while (db_touched != wave_sq_counter);
|
||||
|
||||
uint8_t *base_ptr = reinterpret_cast<uint8_t*>(sq_buf);
|
||||
uint64_t* ctrl_wqe_8B_for_db = reinterpret_cast<uint64_t*>(&base_ptr[64 * ((wave_sq_counter + num_wqes - 1) % sq_wqe_cnt)]);
|
||||
mlx5_ring_doorbell(*ctrl_wqe_8B_for_db, wave_sq_counter + num_wqes);
|
||||
|
||||
__hip_atomic_fetch_add(&quiet_posted, num_wqes, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
__hip_atomic_store(&sq_db_touched, wave_sq_counter + num_wqes, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
}
|
||||
|
||||
__device__ uint64_t QueuePair::mlx5_post_wqe_amo(int pe, int32_t size, uintptr_t *raddr, uint8_t opcode,
|
||||
int64_t atomic_data, int64_t atomic_cmp, bool fetching) {
|
||||
uint64_t activemask = get_active_lane_mask();
|
||||
uint8_t num_active_lanes = get_active_lane_count(activemask);
|
||||
uint8_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
bool is_leader{my_logical_lane_id == 0};
|
||||
const uint64_t leader_phys_lane_id = get_first_active_lane_id(activemask);
|
||||
uint8_t num_wqes{num_active_lanes};
|
||||
uint64_t wave_sq_counter{0};
|
||||
|
||||
if (is_leader) {
|
||||
wave_sq_counter = __hip_atomic_fetch_add(&sq_posted, num_wqes, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
wave_sq_counter = __shfl(wave_sq_counter, leader_phys_lane_id);
|
||||
uint64_t my_sq_counter = wave_sq_counter + my_logical_lane_id;
|
||||
uint64_t my_sq_index = my_sq_counter % sq_wqe_cnt;
|
||||
|
||||
while (true) {
|
||||
uint64_t db_touched = __hip_atomic_load(&sq_db_touched, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
uint64_t sunk = __hip_atomic_load(&sq_sunk, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
int64_t num_active_sq_entries = db_touched - sunk;
|
||||
if (num_active_sq_entries < 0) {
|
||||
continue;
|
||||
}
|
||||
uint64_t num_free_entries = min(sq_wqe_cnt, cq_cnt) - num_active_sq_entries;
|
||||
uint64_t num_entries_until_wave_last_entry = wave_sq_counter + num_active_lanes - db_touched;
|
||||
if (num_free_entries > num_entries_until_wave_last_entry) {
|
||||
break;
|
||||
}
|
||||
mlx5_quiet();
|
||||
}
|
||||
|
||||
uint64_t* wave_fetch_atomic{nullptr};
|
||||
if (fetching) {
|
||||
if (is_leader) {
|
||||
uint64_t db_touched {0};
|
||||
do {
|
||||
db_touched = __hip_atomic_load(&sq_db_touched, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
} while (db_touched != wave_sq_counter);
|
||||
|
||||
auto res = fetching_atomic_freelist->pop_front();
|
||||
while (!res.success) {
|
||||
res = fetching_atomic_freelist->pop_front();
|
||||
}
|
||||
wave_fetch_atomic = res.value;
|
||||
}
|
||||
wave_fetch_atomic = (uint64_t*)__shfl((uint64_t)wave_fetch_atomic, leader_phys_lane_id);
|
||||
}
|
||||
|
||||
outstanding_wqes[my_sq_counter % OUTSTANDING_TABLE_SIZE] = my_sq_counter;
|
||||
|
||||
SegmentBuilder seg_build(my_sq_index, sq_buf);
|
||||
seg_build.update_ctrl_seg(my_sq_counter, opcode, 0, qp_num, MLX5_WQE_CTRL_CQ_UPDATE, 4, 0, 0);
|
||||
seg_build.update_raddr_seg(raddr, rkey);
|
||||
seg_build.update_atomic_seg(atomic_data, atomic_cmp);
|
||||
if (fetching) {
|
||||
seg_build.update_data_seg(wave_fetch_atomic + my_logical_lane_id, 8, fetching_atomic_lkey);
|
||||
} else {
|
||||
seg_build.update_data_seg(nonfetching_atomic, 8, nonfetching_atomic_lkey);
|
||||
}
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
|
||||
if (is_leader) {
|
||||
uint64_t db_touched {0};
|
||||
do {
|
||||
db_touched = __hip_atomic_load(&sq_db_touched, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
} while (db_touched != wave_sq_counter);
|
||||
|
||||
uint8_t *base_ptr = reinterpret_cast<uint8_t*>(sq_buf);
|
||||
uint64_t* ctrl_wqe_8B_for_db = reinterpret_cast<uint64_t*>(&base_ptr[64 * ((wave_sq_counter + num_wqes - 1) % sq_wqe_cnt)]);
|
||||
mlx5_ring_doorbell(*ctrl_wqe_8B_for_db, wave_sq_counter + num_wqes);
|
||||
|
||||
__hip_atomic_fetch_add(&quiet_posted, num_wqes, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
__hip_atomic_store(&sq_db_touched, wave_sq_counter + num_wqes, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
|
||||
uint64_t ret{0};
|
||||
if (fetching) {
|
||||
mlx5_quiet();
|
||||
ret = wave_fetch_atomic[my_logical_lane_id];
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
if (is_leader) {
|
||||
fetching_atomic_freelist->push_back(wave_fetch_atomic);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace rocshmem
|
||||
+1
-1
@@ -25,7 +25,7 @@
|
||||
#include "segment_builder.hpp"
|
||||
|
||||
#include "util.hpp"
|
||||
#include "endian.hpp"
|
||||
#include "gda/endian.hpp"
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
@@ -27,8 +27,6 @@
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "backend_gda.hpp"
|
||||
#include "endian.hpp"
|
||||
#include "segment_builder.hpp"
|
||||
#include "constants.hpp"
|
||||
|
||||
namespace rocshmem {
|
||||
@@ -66,22 +64,33 @@ QueuePair::QueuePair(struct ibv_pd* pd, int gda_vendor) {
|
||||
}
|
||||
|
||||
/* Set Correct opcodes for each NIC */
|
||||
switch (gda_vendor) {
|
||||
#if defined(GDA_IONIC)
|
||||
gda_op_rdma_write = IONIC_V2_OP_RDMA_WRITE;
|
||||
gda_op_rdma_read = IONIC_V2_OP_RDMA_READ;
|
||||
gda_op_atomic_fa = IONIC_V2_OP_ATOMIC_FA;
|
||||
gda_op_atomic_cs = IONIC_V2_OP_ATOMIC_CS;
|
||||
#endif
|
||||
if (gda_vendor == GDAVendor::BNXT) {
|
||||
case GDAVendor::IONIC:
|
||||
gda_op_rdma_write = IONIC_V2_OP_RDMA_WRITE;
|
||||
gda_op_rdma_read = IONIC_V2_OP_RDMA_READ;
|
||||
gda_op_atomic_fa = IONIC_V2_OP_ATOMIC_FA;
|
||||
gda_op_atomic_cs = IONIC_V2_OP_ATOMIC_CS;
|
||||
break;
|
||||
#endif //defined(GDA_IONIC)
|
||||
#if defined(GDA_BNXT)
|
||||
case GDAVendor::BNXT:
|
||||
gda_op_rdma_write = BNXT_RE_WR_OPCD_RDMA_WRITE;
|
||||
gda_op_rdma_read = BNXT_RE_WR_OPCD_RDMA_READ;
|
||||
gda_op_atomic_fa = BNXT_RE_WR_OPCD_ATOMIC_FA;
|
||||
gda_op_atomic_cs = BNXT_RE_WR_OPCD_ATOMIC_CS;
|
||||
} else if (gda_vendor == GDAVendor::MLX5) {
|
||||
break;
|
||||
#endif //defined(GDA_BNXT)
|
||||
#if defined(GDA_MLX5)
|
||||
case GDAVendor::MLX5:
|
||||
gda_op_rdma_write = MLX5_OPCODE_RDMA_WRITE;
|
||||
gda_op_rdma_read = MLX5_OPCODE_RDMA_READ;
|
||||
gda_op_atomic_fa = MLX5_OPCODE_ATOMIC_FA;
|
||||
gda_op_atomic_cs = MLX5_OPCODE_ATOMIC_CS;
|
||||
break;
|
||||
#endif //defined(GDA_MLX5)
|
||||
default:
|
||||
assert(false /* invalid nic provider */);
|
||||
}
|
||||
gda_vendor_ = gda_vendor;
|
||||
}
|
||||
@@ -105,264 +114,6 @@ QueuePair::~QueuePair() {
|
||||
/******************************************************************************
|
||||
************************ PROVIDER-SPECIFIC HELPERS ***************************
|
||||
*****************************************************************************/
|
||||
#if defined(GDA_IONIC)
|
||||
__device__ uint64_t QueuePair::get_same_qp_lane_mask() {
|
||||
uint64_t lane_mask = get_active_lane_mask();
|
||||
uintptr_t this_val = reinterpret_cast<uintptr_t>(this);
|
||||
|
||||
// exclude threads operating on a different qp from this thread lane mask
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
uint64_t bit_i = 1ull << i;
|
||||
if ((lane_mask & bit_i) && __shfl(this_val, i) != this_val) {
|
||||
lane_mask &= ~bit_i;
|
||||
}
|
||||
}
|
||||
|
||||
return lane_mask;
|
||||
}
|
||||
|
||||
__device__ uint32_t QueuePair::reserve_sq(uint64_t activemask, uint32_t num_wqes) {
|
||||
uint32_t my_sq_prod = 0;
|
||||
|
||||
// reserve space for wqes in sq
|
||||
if (is_first_active_lane(activemask)) {
|
||||
my_sq_prod = __hip_atomic_fetch_add(&sq_prod, num_wqes, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
my_sq_prod = __shfl(my_sq_prod, get_first_active_lane_id(activemask));
|
||||
|
||||
// wait for that space to be available
|
||||
ionic_quiet_internal(activemask, my_sq_prod + num_wqes - sq_mask);
|
||||
|
||||
return my_sq_prod;
|
||||
}
|
||||
|
||||
__device__ uint32_t QueuePair::commit_sq(uint64_t activemask, uint32_t my_sq_prod, uint32_t my_sq_pos, uint32_t num_wqes) {
|
||||
uint32_t dbprod = my_sq_prod + num_wqes;
|
||||
|
||||
spin_lock_acquire_shared(&sq_lock, activemask);
|
||||
|
||||
if (is_first_active_lane(activemask) && ((sq_dbprod - dbprod) & (1u << 31))) {
|
||||
sq_dbprod = dbprod;
|
||||
|
||||
ionic_ring_doorbell(dbprod);
|
||||
}
|
||||
|
||||
spin_lock_release_shared(&sq_lock, activemask);
|
||||
|
||||
return dbprod;
|
||||
}
|
||||
|
||||
__device__ void QueuePair::poll_wave_cqes(uint64_t activemask) {
|
||||
uint32_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
uint32_t my_cq_pos = cq_pos + my_logical_lane_id;
|
||||
|
||||
/* Look at the cqe at the current position in the cq buffer */
|
||||
struct ionic_v1_cqe *cqe = &ionic_cq_buf[my_cq_pos & cq_mask];
|
||||
|
||||
/* Determine expected color based on cq wrap count */
|
||||
uint32_t qtf_color_bit = swap_endian_val<uint32_t>(IONIC_V1_CQE_COLOR);
|
||||
uint32_t qtf_color_exp = qtf_color_bit;
|
||||
if (my_cq_pos & (cq_mask + 1)) {
|
||||
qtf_color_exp = 0;
|
||||
}
|
||||
|
||||
/* Check if my cqe color == expected color */
|
||||
uint32_t qtf_be = *(volatile uint32_t *)(&cqe->qid_type_flags);
|
||||
if ((qtf_be & qtf_color_bit) != qtf_color_exp) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t msn = swap_endian_val<uint32_t>(cqe->send.msg_msn);
|
||||
|
||||
/* Report if the completion indicates an error. */
|
||||
if (!!(qtf_be & swap_endian_val<uint32_t>(IONIC_V1_CQE_ERROR))) {
|
||||
#if defined(DEBUG)
|
||||
uint32_t qtf = swap_endian_val<uint32_t>(qtf_be);
|
||||
uint32_t qid = qtf >> IONIC_V1_CQE_QID_SHIFT;
|
||||
uint32_t type = (qtf >> IONIC_V1_CQE_TYPE_SHIFT) & IONIC_V1_CQE_TYPE_MASK;
|
||||
uint32_t flag = qtf & 0xf;
|
||||
uint32_t status = swap_endian_val<uint32_t>(cqe->status_length);
|
||||
uint64_t npg = cqe->send.npg_wqe_idx_timestamp & IONIC_V1_CQE_WQE_IDX_MASK;
|
||||
|
||||
printf("QUIET ERROR: %s qid %u type %u flag %#x status %u msn %u npg %lu\n",
|
||||
dev_name, qid, type, flag, status, msn, npg);
|
||||
#endif
|
||||
/* No other way to signal an error, so just crash. */
|
||||
abort();
|
||||
}
|
||||
|
||||
/* Only proceed with the furthest ahead cqe to update the sq state */
|
||||
uint64_t my_lane_mask = 1ull << __lane_id();
|
||||
uint64_t lesser_lane_mask = my_lane_mask - 1;
|
||||
if (my_lane_mask != (__ballot(true) & activemask & ~lesser_lane_mask)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* update position in the cq */
|
||||
cq_pos = my_cq_pos + 1;
|
||||
|
||||
/*
|
||||
* Ring cq doorbell frequently enough to avoid cq full.
|
||||
*
|
||||
* NB: IONIC_CQ_GRACE is 100
|
||||
*/
|
||||
if (((cq_pos - cq_dbpos) & cq_mask) >= 100) {
|
||||
cq_dbpos = cq_pos;
|
||||
__atomic_store_n(cq_dbreg, cq_dbval | (cq_mask & cq_dbpos), __ATOMIC_SEQ_CST); //TODO:maybe relaxed?
|
||||
}
|
||||
|
||||
sq_msn = msn;
|
||||
}
|
||||
|
||||
__device__ void QueuePair::ionic_quiet_internal(uint64_t activemask, uint32_t cons) {
|
||||
uint32_t greed = 10;
|
||||
|
||||
/* wait for sq_msn to catch up or pass cons. */
|
||||
/* 0x800000 - sign bit for 24-bit fields */
|
||||
while ((sq_msn - cons) & 0x800000) {
|
||||
if (!spin_lock_try_acquire_shared(&cq_lock, activemask)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* with lock acquired, this wave polls cqes until caught up */
|
||||
while ((sq_msn - cons) & 0x800000) {
|
||||
uint32_t old_sq_msn = sq_msn;
|
||||
|
||||
poll_wave_cqes(activemask);
|
||||
|
||||
if (!((sq_msn - cons) & 0x800000)) {
|
||||
if (sq_msn == old_sq_msn) {
|
||||
break;
|
||||
}
|
||||
if (!greed) {
|
||||
break;
|
||||
}
|
||||
--greed;
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock_release_shared(&cq_lock, activemask);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif // GDA_IONIC
|
||||
|
||||
#if defined(GDA_IONIC)
|
||||
__device__ void QueuePair::ionic_ring_doorbell(uint32_t pos) {
|
||||
// TODO When threads write at once to the same address, not all writes reach the bus.
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
if (__lane_id() == i) {
|
||||
__threadfence();
|
||||
__atomic_store_n(sq_dbreg, sq_dbval | (sq_mask & pos), __ATOMIC_SEQ_CST);
|
||||
}
|
||||
}
|
||||
__threadfence();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(GDA_MLX5)
|
||||
__device__ void QueuePair::mlx5_ring_doorbell(uint64_t db_val, uint64_t my_sq_counter) {
|
||||
swap_endian_store(const_cast<uint32_t*>(dbrec), (uint32_t)my_sq_counter);
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
|
||||
__hip_atomic_store(db.ptr, db_val, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SYSTEM);
|
||||
uint64_t db_uint = __hip_atomic_load(&db.uint, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
db_uint ^= 0x100;
|
||||
__hip_atomic_store(&db.uint, db_uint, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
#endif // GDA_MLX5
|
||||
|
||||
#if defined(GDA_IONIC)
|
||||
__device__ void QueuePair::ionic_quiet() {
|
||||
ionic_quiet_internal(get_same_qp_lane_mask(), sq_prod);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(GDA_MLX5)
|
||||
__device__ void QueuePair::mlx5_quiet() {
|
||||
constexpr size_t BROADCAST_SIZE = 1024 / WF_SIZE;
|
||||
__shared__ uint64_t wqe_broadcast[BROADCAST_SIZE];
|
||||
uint8_t wavefront_id = get_flat_block_id() / WF_SIZE;
|
||||
wqe_broadcast[wavefront_id] = 0;
|
||||
|
||||
uint64_t activemask = get_active_lane_mask();
|
||||
uint8_t num_active_lanes = get_active_lane_count(activemask);
|
||||
uint8_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
bool is_leader{my_logical_lane_id == 0};
|
||||
const uint64_t leader_phys_lane_id = get_first_active_lane_id(activemask);
|
||||
|
||||
while (true) {
|
||||
bool done{false};
|
||||
uint64_t quiet_amount{0};
|
||||
uint64_t wave_cq_consumer{0};
|
||||
while (!done) {
|
||||
uint64_t active = __hip_atomic_load(&quiet_active, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
uint64_t posted = __hip_atomic_load(&quiet_posted, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
uint64_t completed = __hip_atomic_load(&quiet_completed, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
if (!(posted - completed)) {
|
||||
return;
|
||||
}
|
||||
int64_t quiet_val = posted - active;
|
||||
if (quiet_val <= 0) {
|
||||
continue;
|
||||
}
|
||||
quiet_amount = min(num_active_lanes, quiet_val);
|
||||
if (is_leader) {
|
||||
done = __hip_atomic_compare_exchange_strong(&quiet_active, &active, active + quiet_amount, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
if (done) {
|
||||
wave_cq_consumer = __hip_atomic_fetch_add(&cq_consumer, quiet_amount, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
}
|
||||
done = __shfl(done, leader_phys_lane_id);
|
||||
}
|
||||
wave_cq_consumer = __shfl(wave_cq_consumer, leader_phys_lane_id);
|
||||
uint64_t my_cq_consumer = wave_cq_consumer + my_logical_lane_id;
|
||||
uint64_t my_cq_index = my_cq_consumer % cq_cnt;
|
||||
|
||||
if (my_logical_lane_id < quiet_amount) {
|
||||
volatile mlx5_cqe64 *cqe_entry = &cq_buf[my_cq_index];
|
||||
uint16_t be_wqe_counter{0};
|
||||
uint8_t op_own{0};
|
||||
uint8_t owner_bit = (my_cq_consumer >> cq_log_cnt) & 1;
|
||||
bool vote_failed{true};
|
||||
|
||||
while (vote_failed) {
|
||||
op_own = *((volatile uint8_t*)&cqe_entry->op_own);
|
||||
bool my_ownership_vote = (op_own & 1) == owner_bit;
|
||||
bool my_opcode_vote = (op_own >> 4) != MLX5_CQE_INVALID;
|
||||
uint64_t votes = __ballot(my_ownership_vote && my_opcode_vote);
|
||||
vote_failed = __popcll(votes) < quiet_amount;
|
||||
if (!vote_failed) {
|
||||
be_wqe_counter = *((volatile uint16_t*)&cqe_entry->wqe_counter);
|
||||
}
|
||||
}
|
||||
|
||||
uint16_t wqe_counter;
|
||||
swap_endian_store(const_cast<uint16_t*>(&wqe_counter), reinterpret_cast<uint16_t>(be_wqe_counter));
|
||||
uint64_t wqe_id = outstanding_wqes[wqe_counter];
|
||||
__hip_atomic_fetch_max(&wqe_broadcast[wavefront_id], wqe_id, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
|
||||
uint8_t mlx5_invld_bits = MLX5_CQE_INVALID << 4 | owner_bit;
|
||||
*((volatile uint8_t*)&cqe_entry->op_own) = mlx5_invld_bits;
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
}
|
||||
if (is_leader) {
|
||||
uint64_t completed {0};
|
||||
do {
|
||||
completed = __hip_atomic_load(&quiet_completed, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
} while (completed != wave_cq_consumer);
|
||||
|
||||
swap_endian_store(const_cast<uint32_t*>(cq_dbrec), (uint32_t)(wave_cq_consumer + quiet_amount));
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
|
||||
uint64_t sunk_wqe_id = wqe_broadcast[wavefront_id];
|
||||
__hip_atomic_fetch_max(&sq_sunk, sunk_wqe_id, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
__hip_atomic_fetch_add(&quiet_completed, quiet_amount, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // GDA_MLX5
|
||||
|
||||
__device__ void QueuePair::post_wqe_rma(int pe, int32_t size, uintptr_t *laddr, uintptr_t *raddr, uint8_t opcode) {
|
||||
switch (gda_vendor_) {
|
||||
#if defined(GDA_MLX5)
|
||||
@@ -428,286 +179,6 @@ __device__ void QueuePair::quiet() {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(GDA_IONIC)
|
||||
__device__ void QueuePair::ionic_post_wqe_rma(int pe, int32_t size, uintptr_t *laddr, uintptr_t *raddr, uint8_t opcode) {
|
||||
uint64_t activemask = get_same_qp_lane_mask();
|
||||
uint32_t num_wqes = get_active_lane_count(activemask);
|
||||
uint32_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
uint32_t my_sq_prod = reserve_sq(activemask, num_wqes);
|
||||
uint32_t my_sq_pos = my_sq_prod + my_logical_lane_id;
|
||||
struct ionic_v1_wqe *wqe = &ionic_sq_buf[my_sq_pos & sq_mask];
|
||||
uint16_t wqe_flags = 0;
|
||||
|
||||
if (!(my_sq_pos & (sq_mask + 1))) {
|
||||
wqe_flags |= swap_endian_val<uint16_t>(IONIC_V1_FLAG_COLOR);
|
||||
}
|
||||
|
||||
if (is_last_active_lane(activemask)) {
|
||||
wqe_flags |= swap_endian_val<uint16_t>(IONIC_V1_FLAG_SIG);
|
||||
}
|
||||
|
||||
// TODO why is this needed?
|
||||
if (size && !laddr && opcode == IONIC_V2_OP_RDMA_WRITE) {
|
||||
size = 1;
|
||||
}
|
||||
|
||||
wqe->base.wqe_idx = my_sq_pos;
|
||||
wqe->base.op = opcode;
|
||||
wqe->base.num_sge_key = size ? 1 : 0;
|
||||
wqe->base.imm_data_key = swap_endian_val<uint32_t>(0);
|
||||
|
||||
wqe->common.rdma.remote_va_high = swap_endian_val<uint32_t>(reinterpret_cast<uint64_t>(raddr) >> 32);
|
||||
wqe->common.rdma.remote_va_low = swap_endian_val<uint32_t>(reinterpret_cast<uint64_t>(raddr));
|
||||
wqe->common.rdma.remote_rkey = swap_endian_val<uint32_t>(rkey);
|
||||
wqe->common.length = swap_endian_val<uint32_t>(size);
|
||||
|
||||
if (size) {
|
||||
if (opcode == IONIC_V2_OP_RDMA_WRITE && size <= inline_threshold) {
|
||||
wqe_flags |= swap_endian_val<uint16_t>(IONIC_V1_FLAG_INL);
|
||||
wqe->base.num_sge_key = 0;
|
||||
if (!laddr) {
|
||||
// TODO why is this needed?
|
||||
wqe->common.pld.data[0] = 1;
|
||||
} else {
|
||||
memcpy(wqe->common.pld.data, laddr, size);
|
||||
}
|
||||
} else {
|
||||
wqe->common.pld.sgl[0].va = swap_endian_val<uint64_t>(reinterpret_cast<uint64_t>(laddr));
|
||||
wqe->common.pld.sgl[0].len = swap_endian_val<uint32_t>(size);
|
||||
wqe->common.pld.sgl[0].lkey = swap_endian_val<uint32_t>(lkey);
|
||||
}
|
||||
}
|
||||
|
||||
__hip_atomic_store(&wqe->base.flags, wqe_flags, __ATOMIC_RELEASE, __HIP_MEMORY_SCOPE_AGENT);
|
||||
|
||||
commit_sq(activemask, my_sq_prod, my_sq_pos, num_wqes);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (GDA_MLX5)
|
||||
__device__ void QueuePair::mlx5_post_wqe_rma(int pe, int32_t size, uintptr_t *laddr, uintptr_t *raddr, uint8_t opcode) {
|
||||
uint64_t activemask = get_active_lane_mask();
|
||||
uint8_t num_active_lanes = get_active_lane_count(activemask);
|
||||
uint8_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
bool is_leader{my_logical_lane_id == 0};
|
||||
const uint64_t leader_phys_lane_id = get_first_active_lane_id(activemask);
|
||||
uint8_t num_wqes{num_active_lanes};
|
||||
uint64_t wave_sq_counter{0};
|
||||
|
||||
if (is_leader) {
|
||||
wave_sq_counter = __hip_atomic_fetch_add(&sq_posted, num_wqes, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
wave_sq_counter = __shfl(wave_sq_counter, leader_phys_lane_id);
|
||||
uint64_t my_sq_counter = wave_sq_counter + my_logical_lane_id;
|
||||
uint64_t my_sq_index = my_sq_counter % sq_wqe_cnt;
|
||||
|
||||
while (true) {
|
||||
uint64_t db_touched = __hip_atomic_load(&sq_db_touched, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
uint64_t sunk = __hip_atomic_load(&sq_sunk, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
int64_t num_active_sq_entries = db_touched - sunk;
|
||||
if (num_active_sq_entries < 0) {
|
||||
continue;
|
||||
}
|
||||
uint64_t num_free_entries = min(sq_wqe_cnt, cq_cnt) - num_active_sq_entries;
|
||||
uint64_t num_entries_until_wave_last_entry = wave_sq_counter + num_active_lanes - db_touched;
|
||||
if (num_free_entries > num_entries_until_wave_last_entry) {
|
||||
break;
|
||||
}
|
||||
mlx5_quiet();
|
||||
}
|
||||
|
||||
outstanding_wqes[my_sq_counter % OUTSTANDING_TABLE_SIZE] = my_sq_counter;
|
||||
|
||||
SegmentBuilder seg_build(my_sq_index, sq_buf);
|
||||
seg_build.update_ctrl_seg(my_sq_counter, opcode, 0, qp_num, MLX5_WQE_CTRL_CQ_UPDATE, 3, 0, 0);
|
||||
seg_build.update_raddr_seg(raddr, rkey);
|
||||
|
||||
if (size <= inline_threshold && opcode == gda_op_rdma_write) {
|
||||
seg_build.update_inl_data_seg(laddr, size);
|
||||
} else {
|
||||
seg_build.update_data_seg(laddr, size, lkey);
|
||||
}
|
||||
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
|
||||
if (is_leader) {
|
||||
uint64_t db_touched {0};
|
||||
do {
|
||||
db_touched = __hip_atomic_load(&sq_db_touched, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
} while (db_touched != wave_sq_counter);
|
||||
|
||||
uint8_t *base_ptr = reinterpret_cast<uint8_t*>(sq_buf);
|
||||
uint64_t* ctrl_wqe_8B_for_db = reinterpret_cast<uint64_t*>(&base_ptr[64 * ((wave_sq_counter + num_wqes - 1) % sq_wqe_cnt)]);
|
||||
mlx5_ring_doorbell(*ctrl_wqe_8B_for_db, wave_sq_counter + num_wqes);
|
||||
|
||||
__hip_atomic_fetch_add(&quiet_posted, num_wqes, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
__hip_atomic_store(&sq_db_touched, wave_sq_counter + num_wqes, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
}
|
||||
#endif // GDA_MLX5
|
||||
|
||||
#if defined(GDA_IONIC)
|
||||
__device__ uint64_t QueuePair::ionic_post_wqe_amo(int pe, int32_t size, uintptr_t *raddr, uint8_t opcode,
|
||||
int64_t atomic_data, int64_t atomic_cmp, bool fetching) {
|
||||
uint64_t activemask = get_same_qp_lane_mask();
|
||||
uint32_t num_wqes = get_active_lane_count(activemask);
|
||||
uint32_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
bool is_leader{my_logical_lane_id == 0};
|
||||
const uint64_t leader_phys_lane_id = get_first_active_lane_id(activemask);
|
||||
uint32_t my_sq_prod = reserve_sq(activemask, num_wqes);
|
||||
uint32_t my_sq_pos = my_sq_prod + my_logical_lane_id;
|
||||
struct ionic_v1_wqe *wqe = &ionic_sq_buf[my_sq_pos & sq_mask];
|
||||
uint16_t wqe_flags = 0;
|
||||
uint32_t cons;
|
||||
|
||||
uint64_t* wave_fetch_atomic{nullptr};
|
||||
if (fetching) {
|
||||
if (is_leader) {
|
||||
auto res = fetching_atomic_freelist->pop_front();
|
||||
while (!res.success) {
|
||||
res = fetching_atomic_freelist->pop_front();
|
||||
}
|
||||
wave_fetch_atomic = res.value;
|
||||
}
|
||||
wave_fetch_atomic = (uint64_t*)__shfl((uint64_t)wave_fetch_atomic, leader_phys_lane_id);
|
||||
}
|
||||
|
||||
if (!(my_sq_pos & (sq_mask + 1))) {
|
||||
wqe_flags |= swap_endian_val<uint16_t>(IONIC_V1_FLAG_COLOR);
|
||||
}
|
||||
|
||||
if (is_last_active_lane(activemask)) {
|
||||
wqe_flags |= swap_endian_val<uint16_t>(IONIC_V1_FLAG_SIG);
|
||||
}
|
||||
|
||||
wqe->base.wqe_idx = my_sq_pos;
|
||||
wqe->base.op = opcode;
|
||||
wqe->base.num_sge_key = 1;
|
||||
wqe->base.imm_data_key = swap_endian_val<uint32_t>(0);
|
||||
|
||||
wqe->atomic_v2.remote_va_high = swap_endian_val<uint32_t>(reinterpret_cast<uint64_t>(raddr) >> 32);
|
||||
wqe->atomic_v2.remote_va_low = swap_endian_val<uint32_t>(reinterpret_cast<uint64_t>(raddr));
|
||||
wqe->atomic_v2.remote_rkey = swap_endian_val<uint32_t>(rkey);
|
||||
wqe->atomic_v2.swap_add_high = swap_endian_val<uint32_t>(atomic_data >> 32);
|
||||
wqe->atomic_v2.swap_add_low = swap_endian_val<uint32_t>(atomic_data);
|
||||
wqe->atomic_v2.compare_high = swap_endian_val<uint32_t>(atomic_cmp >> 32);
|
||||
wqe->atomic_v2.compare_low = swap_endian_val<uint32_t>(atomic_cmp);
|
||||
|
||||
if (fetching) {
|
||||
wqe->atomic_v2.local_va = swap_endian_val<uint64_t>(reinterpret_cast<uint64_t>(wave_fetch_atomic + my_logical_lane_id));
|
||||
wqe->atomic_v2.lkey = swap_endian_val<uint32_t>(fetching_atomic_lkey);
|
||||
} else {
|
||||
wqe->atomic_v2.local_va = swap_endian_val<uint64_t>(reinterpret_cast<uint64_t>(nonfetching_atomic));
|
||||
wqe->atomic_v2.lkey = swap_endian_val<uint32_t>(nonfetching_atomic_lkey);
|
||||
}
|
||||
|
||||
__hip_atomic_store(&wqe->base.flags, wqe_flags, __ATOMIC_RELEASE, __HIP_MEMORY_SCOPE_AGENT);
|
||||
|
||||
cons = commit_sq(activemask, my_sq_prod, my_sq_pos, num_wqes);
|
||||
|
||||
uint64_t ret{0};
|
||||
if (fetching) {
|
||||
ionic_quiet_internal(activemask, cons);
|
||||
ret = wave_fetch_atomic[my_logical_lane_id];
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
if (is_leader) {
|
||||
fetching_atomic_freelist->push_back(wave_fetch_atomic);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(GDA_MLX5)
|
||||
__device__ uint64_t QueuePair::mlx5_post_wqe_amo(int pe, int32_t size, uintptr_t *raddr, uint8_t opcode,
|
||||
int64_t atomic_data, int64_t atomic_cmp, bool fetching) {
|
||||
uint64_t activemask = get_active_lane_mask();
|
||||
uint8_t num_active_lanes = get_active_lane_count(activemask);
|
||||
uint8_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
bool is_leader{my_logical_lane_id == 0};
|
||||
const uint64_t leader_phys_lane_id = get_first_active_lane_id(activemask);
|
||||
uint8_t num_wqes{num_active_lanes};
|
||||
uint64_t wave_sq_counter{0};
|
||||
|
||||
if (is_leader) {
|
||||
wave_sq_counter = __hip_atomic_fetch_add(&sq_posted, num_wqes, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
wave_sq_counter = __shfl(wave_sq_counter, leader_phys_lane_id);
|
||||
uint64_t my_sq_counter = wave_sq_counter + my_logical_lane_id;
|
||||
uint64_t my_sq_index = my_sq_counter % sq_wqe_cnt;
|
||||
|
||||
while (true) {
|
||||
uint64_t db_touched = __hip_atomic_load(&sq_db_touched, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
uint64_t sunk = __hip_atomic_load(&sq_sunk, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
int64_t num_active_sq_entries = db_touched - sunk;
|
||||
if (num_active_sq_entries < 0) {
|
||||
continue;
|
||||
}
|
||||
uint64_t num_free_entries = min(sq_wqe_cnt, cq_cnt) - num_active_sq_entries;
|
||||
uint64_t num_entries_until_wave_last_entry = wave_sq_counter + num_active_lanes - db_touched;
|
||||
if (num_free_entries > num_entries_until_wave_last_entry) {
|
||||
break;
|
||||
}
|
||||
mlx5_quiet();
|
||||
}
|
||||
|
||||
uint64_t* wave_fetch_atomic{nullptr};
|
||||
if (fetching) {
|
||||
if (is_leader) {
|
||||
uint64_t db_touched {0};
|
||||
do {
|
||||
db_touched = __hip_atomic_load(&sq_db_touched, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
} while (db_touched != wave_sq_counter);
|
||||
|
||||
auto res = fetching_atomic_freelist->pop_front();
|
||||
while (!res.success) {
|
||||
res = fetching_atomic_freelist->pop_front();
|
||||
}
|
||||
wave_fetch_atomic = res.value;
|
||||
}
|
||||
wave_fetch_atomic = (uint64_t*)__shfl((uint64_t)wave_fetch_atomic, leader_phys_lane_id);
|
||||
}
|
||||
|
||||
outstanding_wqes[my_sq_counter % OUTSTANDING_TABLE_SIZE] = my_sq_counter;
|
||||
|
||||
SegmentBuilder seg_build(my_sq_index, sq_buf);
|
||||
seg_build.update_ctrl_seg(my_sq_counter, opcode, 0, qp_num, MLX5_WQE_CTRL_CQ_UPDATE, 4, 0, 0);
|
||||
seg_build.update_raddr_seg(raddr, rkey);
|
||||
seg_build.update_atomic_seg(atomic_data, atomic_cmp);
|
||||
if (fetching) {
|
||||
seg_build.update_data_seg(wave_fetch_atomic + my_logical_lane_id, 8, fetching_atomic_lkey);
|
||||
} else {
|
||||
seg_build.update_data_seg(nonfetching_atomic, 8, nonfetching_atomic_lkey);
|
||||
}
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
|
||||
if (is_leader) {
|
||||
uint64_t db_touched {0};
|
||||
do {
|
||||
db_touched = __hip_atomic_load(&sq_db_touched, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
} while (db_touched != wave_sq_counter);
|
||||
|
||||
uint8_t *base_ptr = reinterpret_cast<uint8_t*>(sq_buf);
|
||||
uint64_t* ctrl_wqe_8B_for_db = reinterpret_cast<uint64_t*>(&base_ptr[64 * ((wave_sq_counter + num_wqes - 1) % sq_wqe_cnt)]);
|
||||
mlx5_ring_doorbell(*ctrl_wqe_8B_for_db, wave_sq_counter + num_wqes);
|
||||
|
||||
__hip_atomic_fetch_add(&quiet_posted, num_wqes, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
__hip_atomic_store(&sq_db_touched, wave_sq_counter + num_wqes, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
|
||||
}
|
||||
|
||||
uint64_t ret{0};
|
||||
if (fetching) {
|
||||
mlx5_quiet();
|
||||
ret = wave_fetch_atomic[my_logical_lane_id];
|
||||
__atomic_signal_fence(__ATOMIC_SEQ_CST);
|
||||
if (is_leader) {
|
||||
fetching_atomic_freelist->push_back(wave_fetch_atomic);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#endif // GDA_MLX5
|
||||
|
||||
/******************************************************************************
|
||||
****************************** SHMEM INTERFACE *******************************
|
||||
*****************************************************************************/
|
||||
|
||||
@@ -163,14 +163,17 @@ class QueuePair {
|
||||
#if defined(GDA_MLX5)
|
||||
__device__ uint64_t mlx5_post_wqe_amo(int pe, int32_t size, uintptr_t *raddr, uint8_t opcode, int64_t atomic_data, int64_t atomic_cmp, bool fetch);
|
||||
__device__ void mlx5_post_wqe_rma(int pe, int32_t size, uintptr_t *laddr, uintptr_t *raddr, uint8_t opcode);
|
||||
__device__ void mlx5_quiet();
|
||||
#endif
|
||||
#if defined(GDA_BNXT)
|
||||
__device__ uint64_t bnxt_post_wqe_amo(int pe, int32_t size, uintptr_t *raddr, uint8_t opcode, int64_t atomic_data, int64_t atomic_cmp, bool fetch);
|
||||
__device__ void bnxt_post_wqe_rma(int pe, int32_t size, uintptr_t *laddr, uintptr_t *raddr, uint8_t opcode);
|
||||
__device__ void bnxt_quiet();
|
||||
#endif
|
||||
#if defined(GDA_IONIC)
|
||||
__device__ uint64_t ionic_post_wqe_amo(int pe, int32_t size, uintptr_t *raddr, uint8_t opcode, int64_t atomic_data, int64_t atomic_cmp, bool fetch);
|
||||
__device__ void ionic_post_wqe_rma(int pe, int32_t size, uintptr_t *laddr, uintptr_t *raddr, uint8_t opcode);
|
||||
__device__ void ionic_quiet();
|
||||
#endif
|
||||
|
||||
/**
|
||||
@@ -189,65 +192,6 @@ class QueuePair {
|
||||
__device__ void ionic_ring_doorbell(uint32_t pos);
|
||||
#endif
|
||||
|
||||
#ifdef GDA_IONIC
|
||||
__device__ uint64_t get_same_qp_lane_mask();
|
||||
|
||||
/**
|
||||
* @brief Reserve space in the sq to post this many wqes.
|
||||
* @param my_tid my logical thread id.
|
||||
* @param num_wqes number of sq wqes to reserve for this wave.
|
||||
* @return position of my_tid=0's wqe.
|
||||
*/
|
||||
__device__ uint32_t reserve_sq(uint64_t active_lane_mask, uint32_t num_wqes);
|
||||
|
||||
/**
|
||||
* @brief Ring the sq doorbell maintaining order between waves.
|
||||
* @param last this is the last wqe posted in this wave.
|
||||
* @param my_sq_prod position of my_tid=0's wqe.
|
||||
* @param num_wqes number of sq wqes posted in this wave.
|
||||
* @param wqe this thread's wqe.
|
||||
* @return doorbell producer index.
|
||||
*/
|
||||
__device__ uint32_t commit_sq(uint64_t activemask, uint32_t my_sq_prod, uint32_t my_sq_pos, uint32_t num_wqes);
|
||||
|
||||
/**
|
||||
* @brief Helper method to poll the next completion queue entry.
|
||||
*/
|
||||
__device__ __attribute__((noinline)) void poll_wave_cqes(uint64_t active_lane_mask);
|
||||
|
||||
/**
|
||||
* @brief Helper method to drain completion queue entries.
|
||||
* @param cons wait for sq_msn to catch up to this position.
|
||||
*/
|
||||
__device__ __attribute__((noinline)) void ionic_quiet_internal(uint64_t active_lane_mask, uint32_t cons);
|
||||
|
||||
uint64_t *cq_dbreg{nullptr};
|
||||
uint64_t cq_dbval{0};
|
||||
uint64_t cq_mask{0};
|
||||
struct ionic_v1_cqe *ionic_cq_buf{nullptr};
|
||||
uint32_t cq_lock{SPIN_LOCK_UNLOCKED};
|
||||
uint32_t cq_pos{0};
|
||||
uint32_t cq_dbpos{0};
|
||||
|
||||
uint64_t *sq_dbreg{nullptr};
|
||||
uint64_t sq_dbval{0};
|
||||
uint64_t sq_mask{0};
|
||||
struct ionic_v1_wqe *ionic_sq_buf{nullptr};
|
||||
uint32_t sq_lock{SPIN_LOCK_UNLOCKED};
|
||||
uint32_t sq_dbprod{0};
|
||||
uint32_t sq_prod{0};
|
||||
uint32_t sq_msn{0};
|
||||
#endif
|
||||
|
||||
#if defined(GDA_MLX5)
|
||||
__device__ void mlx5_quiet();
|
||||
#endif
|
||||
#if defined(GDA_BNXT)
|
||||
__device__ void bnxt_quiet();
|
||||
#endif
|
||||
#if defined(GDA_IONIC)
|
||||
__device__ void ionic_quiet();
|
||||
#endif
|
||||
int gda_vendor_{0};
|
||||
|
||||
/* GDAVendor::BNXT START */
|
||||
@@ -322,6 +266,58 @@ class QueuePair {
|
||||
|
||||
/* GDAVendor::MLX5 END */
|
||||
|
||||
/* GDAVendor::IONIC START */
|
||||
|
||||
uint64_t *cq_dbreg{nullptr};
|
||||
uint64_t cq_dbval{0};
|
||||
uint64_t cq_mask{0};
|
||||
struct ionic_v1_cqe *ionic_cq_buf{nullptr};
|
||||
uint32_t cq_lock{SPIN_LOCK_UNLOCKED};
|
||||
uint32_t cq_pos{0};
|
||||
uint32_t cq_dbpos{0};
|
||||
|
||||
uint64_t *sq_dbreg{nullptr};
|
||||
uint64_t sq_dbval{0};
|
||||
uint64_t sq_mask{0};
|
||||
struct ionic_v1_wqe *ionic_sq_buf{nullptr};
|
||||
uint32_t sq_lock{SPIN_LOCK_UNLOCKED};
|
||||
uint32_t sq_dbprod{0};
|
||||
uint32_t sq_prod{0};
|
||||
uint32_t sq_msn{0};
|
||||
|
||||
__device__ uint64_t get_same_qp_lane_mask();
|
||||
|
||||
/**
|
||||
* @brief Reserve space in the sq to post this many wqes.
|
||||
* @param my_tid my logical thread id.
|
||||
* @param num_wqes number of sq wqes to reserve for this wave.
|
||||
* @return position of my_tid=0's wqe.
|
||||
*/
|
||||
__device__ uint32_t reserve_sq(uint64_t active_lane_mask, uint32_t num_wqes);
|
||||
|
||||
/**
|
||||
* @brief Ring the sq doorbell maintaining order between waves.
|
||||
* @param last this is the last wqe posted in this wave.
|
||||
* @param my_sq_prod position of my_tid=0's wqe.
|
||||
* @param num_wqes number of sq wqes posted in this wave.
|
||||
* @param wqe this thread's wqe.
|
||||
* @return doorbell producer index.
|
||||
*/
|
||||
__device__ uint32_t commit_sq(uint64_t activemask, uint32_t my_sq_prod, uint32_t my_sq_pos, uint32_t num_wqes);
|
||||
|
||||
/**
|
||||
* @brief Helper method to poll the next completion queue entry.
|
||||
*/
|
||||
__device__ __attribute__((noinline)) void poll_wave_cqes(uint64_t active_lane_mask);
|
||||
|
||||
/**
|
||||
* @brief Helper method to drain completion queue entries.
|
||||
* @param cons wait for sq_msn to catch up to this position.
|
||||
*/
|
||||
__device__ __attribute__((noinline)) void ionic_quiet_internal(uint64_t active_lane_mask, uint32_t cons);
|
||||
|
||||
/* GDAVendor::IONIC END */
|
||||
|
||||
uint32_t inline_threshold{0};
|
||||
|
||||
char dev_name[24];
|
||||
|
||||
Ссылка в новой задаче
Block a user