diff --git a/cmake/FindIBVerbs.cmake b/cmake/FindIBVerbs.cmake index 6c4d631262..c295443a25 100644 --- a/cmake/FindIBVerbs.cmake +++ b/cmake/FindIBVerbs.cmake @@ -48,25 +48,25 @@ find_library(IBVerbs_PROVIDER_LIBRARY HINTS ${PC_IBVerbs_LIBDIR} ${PC_IBVerbs_LIBRARY_DIRS} PATH_SUFFIXES lib lib64 ) -elseif (GDA_BNXT) -find_library(IBVerbs_PROVIDER_LIBRARY - NAMES bnxt_re libbnxt_re - HINTS ${PC_IBVerbs_LIBDIR} ${PC_IBVerbs_LIBRARY_DIRS} - PATH_SUFFIXES lib lib64 -) -else() -find_library(IBVerbs_PROVIDER_LIBRARY - NAMES mlx5 libmlx5 - HINTS ${PC_IBVerbs_LIBDIR} ${PC_IBVerbs_LIBRARY_DIRS} - PATH_SUFFIXES lib lib64 -) -endif() find_package_handle_standard_args(IBVerbs DEFAULT_MSG IBVerbs_LIBRARY IBVerbs_INCLUDE_DIR IBVerbs_PROVIDER_LIBRARY ) mark_as_advanced(IBVerbs_LIBRARY IBVerbs_INCLUDE_DIR IBVerbs_PROVIDER_LIBRARY) +add_library(IBVerbs::verbs_provider UNKNOWN IMPORTED) +set_target_properties(IBVerbs::verbs_provider PROPERTIES + IMPORTED_LOCATION "${IBVerbs_PROVIDER_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${IBVerbs_PROVIDER_INCLUDE_DIR}" +) +target_link_libraries(IBVerbs::verbs IBVerbs::verbs_provider) +endif() + +find_package_handle_standard_args(IBVerbs DEFAULT_MSG + IBVerbs_LIBRARY IBVerbs_INCLUDE_DIR +) +mark_as_advanced(IBVerbs_LIBRARY IBVerbs_INCLUDE_DIR) + if (IBVerbs_FOUND) add_library(IBVerbs::verbs UNKNOWN IMPORTED) set_target_properties(IBVerbs::verbs PROPERTIES @@ -74,10 +74,7 @@ set_target_properties(IBVerbs::verbs PROPERTIES INTERFACE_COMPILE_OPTIONS "${PC_IBVerbs_CFLAGS_OTHER}" INTERFACE_INCLUDE_DIRECTORIES "${IBVerbs_INCLUDE_DIR}" ) -add_library(IBVerbs::verbs_provider UNKNOWN IMPORTED) -set_target_properties(IBVerbs::verbs_provider PROPERTIES - IMPORTED_LOCATION "${IBVerbs_PROVIDER_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${IBVerbs_PROVIDER_INCLUDE_DIR}" -) -target_link_libraries(IBVerbs::verbs INTERFACE IBVerbs::verbs_provider) + +target_link_libraries(IBVerbs::verbs INTERFACE) + endif() diff --git a/src/gda/CMakeLists.txt b/src/gda/CMakeLists.txt index 70480ecc43..1d8234297c 100644 --- a/src/gda/CMakeLists.txt +++ b/src/gda/CMakeLists.txt @@ -35,6 +35,7 @@ target_sources( gda_team.cpp queue_pair.cpp endian.cpp + segment_builder.cpp topology.cpp ) @@ -47,10 +48,4 @@ target_link_libraries( numa ) -if (GDA_BNXT) - add_subdirectory(bnxt) -endif() - -if (GDA_MLX5) - target_sources(${PROJECT_NAME} PRIVATE segment_builder.cpp) -endif() +add_subdirectory(bnxt) diff --git a/src/gda/backend_gda.cpp b/src/gda/backend_gda.cpp index a467925b19..17c152c1fb 100644 --- a/src/gda/backend_gda.cpp +++ b/src/gda/backend_gda.cpp @@ -70,29 +70,8 @@ GDABackend::GDABackend(TcpBootstrap *bootstrap): Backend(bootstrap) { } void GDABackend::init() { - type = BackendType::GDA_BACKEND; - int ret; -#if defined(GDA_BNXT) - ret = bnxt_dv_dl_init(); - if (ret != ROCSHMEM_SUCCESS) { - // Disable BNXT GDA support. - DPRINTF("Initializing rocSHMEM BNXT GDA support failed\n"); - // We abort for now, but might remove that once we support - // multiple NIC types in the same build - abort(); - } -#endif -#if defined(GDA_MLX5) - ret = mlx5_dv_dl_init(); - if (ret != ROCSHMEM_SUCCESS) { - // Disable MLX5 GDA support. - DPRINTF("Initializing rocSHMEM MLX5 GDA support failed\n"); - // We abort for now, but might remove that once we support - // multiple NIC types in the same build - abort(); - } -#endif + type = BackendType::GDA_BACKEND; read_env(); @@ -141,14 +120,11 @@ GDABackend::~GDABackend() { cleanup_heap_memory_rkey(); cleanup_ibv(); -#if defined(GDA_BNXT) if (bnxtdv_handle_ != nullptr) dlclose(bnxtdv_handle_); -#endif -#if defined(GDA_MLX5) + if (mlx5dv_handle_ != nullptr) dlclose(mlx5dv_handle_); -#endif } void GDABackend::read_env() { @@ -562,20 +538,20 @@ void GDABackend::rte_barrier() { } } -#if defined(GDA_MLX5) int GDABackend::mlx5_dv_dl_init () { mlx5dv_handle_ = dlopen("libmlx5.so", RTLD_NOW); if (!mlx5dv_handle_) { - printf("Could not open libmlx5.so. Returning\n"); + DPRINTF("Could not open libmlx5.so. Returning\n"); return ROCSHMEM_ERROR; } DLSYM_HELPER(mlx5dv_ftable_, mlx5dv_, mlx5dv_handle_, init_obj); return ROCSHMEM_SUCCESS; } -#endif void GDABackend::setup_ibv() { + autodetect_dv_libs(); + open_ib_device(); create_queues(); @@ -592,50 +568,50 @@ void GDABackend::setup_ibv() { void GDABackend::cleanup_ibv() { int err; -#ifdef GDA_BNXT - CHECK_HIP(hipHostUnregister(db_region_attr.dbr)); + if (gda_vendor == GDAVendor::BNXT) { + CHECK_HIP(hipHostUnregister(db_region_attr.dbr)); - for (int i = 0; i < qps.size(); i++) { - err = bnxtdv_ftable_.destroy_qp(qps[i]); - CHECK_ZERO(err, "bnxt_re_dv_destroy_qp"); + for (int i = 0; i < qps.size(); i++) { + err = bnxtdv_ftable_.destroy_qp(qps[i]); + CHECK_ZERO(err, "bnxt_re_dv_destroy_qp"); - err = bnxtdv_ftable_.umem_dereg(bnxt_qps[i].attr.rq_umem_handle); - CHECK_ZERO(err, "bnxt_re_dv_umem_dereg (RQ)"); + err = bnxtdv_ftable_.umem_dereg(bnxt_qps[i].attr.rq_umem_handle); + CHECK_ZERO(err, "bnxt_re_dv_umem_dereg (RQ)"); - err = bnxtdv_ftable_.umem_dereg(bnxt_qps[i].attr.sq_umem_handle); - CHECK_ZERO(err, "bnxt_re_dv_umem_dereg (SQ)"); + err = bnxtdv_ftable_.umem_dereg(bnxt_qps[i].attr.sq_umem_handle); + CHECK_ZERO(err, "bnxt_re_dv_umem_dereg (SQ)"); - CHECK_HIP(hipFree(bnxt_qps[i].sq_buf)); - CHECK_HIP(hipFree(bnxt_qps[i].rq_buf)); + CHECK_HIP(hipFree(bnxt_qps[i].sq_buf)); + CHECK_HIP(hipFree(bnxt_qps[i].rq_buf)); - err = bnxtdv_ftable_.destroy_cq(cqs[i]); - CHECK_ZERO(err, "bnxt_re_dv_destroy_cq"); + err = bnxtdv_ftable_.destroy_cq(cqs[i]); + CHECK_ZERO(err, "bnxt_re_dv_destroy_cq"); - err = bnxtdv_ftable_.umem_dereg(bnxt_cqs[i].umem_handle); - CHECK_ZERO(err, "bnxt_re_dv_umem_dereg"); + err = bnxtdv_ftable_.umem_dereg(bnxt_cqs[i].umem_handle); + CHECK_ZERO(err, "bnxt_re_dv_umem_dereg"); - CHECK_HIP(hipFree(bnxt_cqs[i].buf)); + CHECK_HIP(hipFree(bnxt_cqs[i].buf)); + } + } else { + for (int i = 0; i < qps.size(); i++) { + err = ibv_destroy_qp(qps[i]); + CHECK_ZERO(err, "ibv_destroy_qp"); + + err = ibv_destroy_cq(cqs[i]); + CHECK_ZERO(err, "ibv_destroy_cqs"); + } + + if (gda_vendor == GDAVendor::IONIC) { + err = ibv_dealloc_pd(pd_uxdma[0]); + CHECK_ZERO(err, "ibv_dealloc_pd (uxdma[0])"); + + err = ibv_dealloc_pd(pd_uxdma[1]); + CHECK_ZERO(err, "ibv_dealloc_pd (uxdma[1])"); + } + + err = ibv_dealloc_pd(pd_parent); + CHECK_ZERO(err, "ibv_dealloc_pd (pd_parent)"); } -#else - for (int i = 0; i < qps.size(); i++) { - err = ibv_destroy_qp(qps[i]); - CHECK_ZERO(err, "ibv_destroy_qp"); - - err = ibv_destroy_cq(cqs[i]); - CHECK_ZERO(err, "ibv_destroy_cqs"); - } - -#ifdef GDA_IONIC - err = ibv_dealloc_pd(pd_uxdma[0]); - CHECK_ZERO(err, "ibv_dealloc_pd (uxdma[0])"); - - err = ibv_dealloc_pd(pd_uxdma[1]); - CHECK_ZERO(err, "ibv_dealloc_pd (uxdma[1])"); -#endif - - err = ibv_dealloc_pd(pd_parent); - CHECK_ZERO(err, "ibv_dealloc_pd (pd_parent)"); -#endif err = ibv_dealloc_pd(pd_orig); CHECK_ZERO(err, "ibv_dealloc_pd (pd_orig)"); @@ -644,6 +620,40 @@ void GDABackend::cleanup_ibv() { CHECK_ZERO(err, "ibv_close_device"); } +void GDABackend::autodetect_dv_libs() { + int ret; + +#ifdef GDA_IONIC + gda_vendor = GDAVendor::IONIC; +#endif + + if (gda_vendor == GDAVendor::NONE) { + ret = bnxt_dv_dl_init(); + + if (ret == ROCSHMEM_SUCCESS) { + gda_vendor = GDAVendor::BNXT; + } else { + DPRINTF("Initializing rocSHMEM BNXT GDA support failed\n"); + } + } + + if (gda_vendor == GDAVendor::NONE) { + ret = mlx5_dv_dl_init(); + + if (ret == ROCSHMEM_SUCCESS) { + gda_vendor = GDAVendor::MLX5; + } else { + DPRINTF("Initializing rocSHMEM MLX5 GDA support failed\n"); + } + } + + if (gda_vendor == GDAVendor::NONE) { + printf("Initializing rocSHMEM with IONIC, BNXT, or MLX5 GDA support failed\n"); + abort(); + } +} + + void GDABackend::exchange_qp_dest_info() { for (int i = 0; i < qps.size(); i++) { dest_info[i].lid = portinfo.lid; @@ -712,9 +722,14 @@ void GDABackend::setup_gpu_qps() { CHECK_NNULL(host_qps, "malloc (host_qps)"); for (int i = 0; i < qp_objs_count; i++) { - new (&host_qps[i]) QueuePair(pd_orig); + new (&host_qps[i]) QueuePair(pd_orig, gda_vendor); CHECK_HIP(hipMemcpy(&gpu_qps[i], &host_qps[i], sizeof(QueuePair), hipMemcpyDefault)); - initialize_gpu_qp(&gpu_qps[i], i); + + if (gda_vendor == GDAVendor::BNXT) { + bnxt_initialize_gpu_qp(&gpu_qps[i], i); + } else { + initialize_gpu_qp(&gpu_qps[i], i); + } } } @@ -766,9 +781,9 @@ void GDABackend::open_ib_device() { CHECK_NNULL(pd_orig, "ib allocate pd"); dump_ibv_pd(pd_orig); -#ifndef GDA_BNXT - create_parent_domain(); -#endif + if (gda_vendor == GDAVendor::IONIC || gda_vendor == GDAVendor::MLX5) { + create_parent_domain(); + } err = ibv_query_port(context, port, &portinfo); CHECK_ZERO(err, "ibv_query_port"); @@ -801,11 +816,11 @@ void GDABackend::modify_qps_reset_to_init() { | IBV_QP_ACCESS_FLAGS; for (int i =0; i < qps.size() ; i++) { -#ifdef GDA_BNXT - err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0); -#else - err = ibv_modify_qp(qps[i], &attr, attr_mask); -#endif + if (gda_vendor == GDAVendor::BNXT) { + err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0); + } else { + err = ibv_modify_qp(qps[i], &attr, attr_mask); + } CHECK_ZERO(err, "modify_qp (INIT)"); } } @@ -818,10 +833,15 @@ void GDABackend::modify_qps_init_to_rtr() { memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_RTR; attr.path_mtu = portinfo.active_mtu; - attr.max_dest_rd_atomic = GDA_MAX_ATOMIC; attr.min_rnr_timer = 12; attr.ah_attr.port_num = port; + if (gda_vendor == GDAVendor::IONIC) { + attr.max_dest_rd_atomic = 15; + } else { + attr.max_dest_rd_atomic = 1; + } + if (portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) { attr.ah_attr.grh.sgid_index = gid_index; attr.ah_attr.is_global = 1; @@ -847,11 +867,11 @@ void GDABackend::modify_qps_init_to_rtr() { attr.ah_attr.dlid = dest_info[i].lid; } -#ifdef GDA_BNXT - err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0); -#else - err = ibv_modify_qp(qps[i], &attr, attr_mask); -#endif + if (gda_vendor == GDAVendor::BNXT) { + err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0); + } else { + err = ibv_modify_qp(qps[i], &attr, attr_mask); + } CHECK_ZERO(err, "modify_qp (RTR)"); } } @@ -863,11 +883,16 @@ void GDABackend::modify_qps_rtr_to_rts() { memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_RTS; - attr.max_rd_atomic = GDA_MAX_ATOMIC; attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 7; + if (gda_vendor == GDAVendor::IONIC) { + attr.max_dest_rd_atomic = 15; + } else { + attr.max_dest_rd_atomic = 1; + } + attr_mask = IBV_QP_STATE | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC @@ -878,11 +903,11 @@ void GDABackend::modify_qps_rtr_to_rts() { for (int i = 0; i < qps.size(); i++) { attr.sq_psn = dest_info[i].psn; -#ifdef GDA_BNXT - err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0); -#else - err = ibv_modify_qp(qps[i], &attr, attr_mask); -#endif + if (gda_vendor == GDAVendor::BNXT) { + err = bnxtdv_ftable_.modify_qp(qps[i], &attr, attr_mask, 0, 0); + } else { + err = ibv_modify_qp(qps[i], &attr, attr_mask); + } CHECK_ZERO(err, "modify_qp (RTS)"); } } @@ -891,11 +916,11 @@ void GDABackend::create_queues() { int ncqes; int resize_length; -#ifdef GDA_IONIC - ncqes = sq_size << 1; -#else - ncqes = sq_size; -#endif + if (gda_vendor == GDAVendor::IONIC) { + ncqes = sq_size << 1; + } else { + ncqes = sq_size; + } resize_length = (maximum_num_contexts_ + 1) * num_pes; @@ -903,24 +928,28 @@ void GDABackend::create_queues() { cqs.resize(resize_length); qps.resize(resize_length); -#ifdef GDA_BNXT bnxt_cqs.resize(resize_length); bnxt_qps.resize(resize_length); -#endif - create_cqs(ncqes); - create_qps(sq_size); + if (gda_vendor == GDAVendor::BNXT) { + bnxt_create_cqs(ncqes); + bnxt_create_qps(sq_size); + } else { + create_cqs(ncqes); + create_qps(sq_size); + } } -#ifndef GDA_BNXT -void* GDABackend::pd_alloc(struct ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type) { +void* GDABackend::pd_alloc_device_uncached(struct ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type) { void* dev_ptr{nullptr}; - //TODO make this configurable, presumably we want it on device for all types? -#ifdef GDA_IONIC CHECK_HIP(hipExtMallocWithFlags(reinterpret_cast(&dev_ptr), size, hipDeviceMallocUncached)); -#else + memset(dev_ptr, 0, size); + return dev_ptr; +} + +void* GDABackend::pd_alloc_host(struct ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type) { + void* dev_ptr{nullptr}; CHECK_HIP(hipHostMalloc(reinterpret_cast(&dev_ptr), size, hipHostMallocDefault)); -#endif memset(dev_ptr, 0, size); return dev_ptr; } @@ -933,12 +962,17 @@ void GDABackend::create_parent_domain() { struct ibv_parent_domain_init_attr pattr; memset(&pattr, 0, sizeof(struct ibv_parent_domain_init_attr)); - pattr.pd = pd_orig, + pattr.pd = pd_orig; pattr.td = nullptr, - pattr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_ALLOCATORS, - pattr.alloc = GDABackend::pd_alloc, - pattr.free = GDABackend::pd_release, - pattr.pd_context = nullptr, + pattr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_ALLOCATORS; + pattr.free = GDABackend::pd_release; + pattr.pd_context = nullptr; + + if (gda_vendor == GDAVendor::IONIC) { + pattr.alloc = GDABackend::pd_alloc_device_uncached; + } else { + pattr.alloc = GDABackend::pd_alloc_host; + } pd_parent = ibv_alloc_parent_domain(context, &pattr); CHECK_NNULL(pd_parent, "ibv_alloc_parent_domain"); @@ -956,7 +990,7 @@ void GDABackend::create_parent_domain() { ionic_dv_pd_set_rqcmb(pd_uxdma[uxdma_i], false, false, false); ionic_dv_pd_set_udma_mask(pd_uxdma[uxdma_i], 1u << uxdma_i); } -#endif +#endif /* GDA_IONIC */ } void GDABackend::create_cqs(int cqe) { @@ -973,9 +1007,9 @@ void GDABackend::create_cqs(int cqe) { cq_attr.parent_domain = pd_parent; for (int i = 0; i < qps.size(); i++) { -#ifdef GDA_IONIC - cq_attr.parent_domain = pd_uxdma[((i + 1) / 2) & 1]; -#endif + if (gda_vendor == GDAVendor::IONIC) { + cq_attr.parent_domain = pd_uxdma[((i + 1) / 2) & 1]; + } cq_ex = ibv_create_cq_ex(context, &cq_attr); CHECK_NNULL(cq_ex, "ibv_create_cq_ex"); @@ -1028,76 +1062,77 @@ void GDABackend::initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) { gpu_qp->lkey = heap_mr->lkey; gpu_qp->rkey = heap_rkey[conn_num % num_pes]; gpu_qp->inline_threshold = 32; -#else // !GDA_IONIC - mlx5dv_cq cq_out; - mlx5dv_obj mlx_obj; - mlx_obj.cq.in = cqs[conn_num]; - mlx_obj.cq.out = &cq_out; - mlx5dv_ftable_.init_obj(&mlx_obj, MLX5DV_OBJ_CQ); - dump_mlx5dv_cq(&cq_out, conn_num); +#endif /* GDA_IONIC */ + if (gda_vendor == GDAVendor::MLX5) { + mlx5dv_cq cq_out; + mlx5dv_obj mlx_obj; + mlx_obj.cq.in = cqs[conn_num]; + mlx_obj.cq.out = &cq_out; + mlx5dv_ftable_.init_obj(&mlx_obj, MLX5DV_OBJ_CQ); + dump_mlx5dv_cq(&cq_out, conn_num); - /* - * struct mlx5dv_cq { - * void *buf; - * __be32 *dbrec; - * uint32_t cqe_cnt; - * uint32_t cqe_size; - * void *cq_uar; - * uint32_t cqn; - * uint64_t comp_mask; - * }; - */ + /* + * struct mlx5dv_cq { + * void *buf; + * __be32 *dbrec; + * uint32_t cqe_cnt; + * uint32_t cqe_size; + * void *cq_uar; + * uint32_t cqn; + * uint64_t comp_mask; + * }; + */ - gpu_qp->cq_buf = reinterpret_cast(cq_out.buf); - gpu_qp->cq_cnt = cq_out.cqe_cnt; - gpu_qp->cq_log_cnt = log2(cq_out.cqe_cnt); - gpu_qp->cq_dbrec = cq_out.dbrec; + gpu_qp->cq_buf = reinterpret_cast(cq_out.buf); + gpu_qp->cq_cnt = cq_out.cqe_cnt; + gpu_qp->cq_log_cnt = log2(cq_out.cqe_cnt); + gpu_qp->cq_dbrec = cq_out.dbrec; - mlx5dv_qp qp_out; - mlx_obj.qp.in = qps[conn_num]; - mlx_obj.qp.out = &qp_out; - mlx5dv_ftable_.init_obj(&mlx_obj, MLX5DV_OBJ_QP); - dump_mlx5dv_qp(&qp_out, conn_num); + mlx5dv_qp qp_out; + mlx_obj.qp.in = qps[conn_num]; + mlx_obj.qp.out = &qp_out; + mlx5dv_ftable_.init_obj(&mlx_obj, MLX5DV_OBJ_QP); + dump_mlx5dv_qp(&qp_out, conn_num); - /* - * struct mlx5dv_qp { - * __be32 *dbrec; - * struct { - * void *buf; - * uint32_t wqe_cnt; - * uint32_t stride; - * } sq; - * struct { - * void *buf; - * uint32_t wqe_cnt; - * uint32_t stride; - * } rq; - * struct { - * void *reg; - * uint32_t size; - * } bf; - * uint64_t comp_mask; - * off_t uar_mmap_offset; - * uint32_t tirn; - * uint32_t tisn; - * uint32_t rqn; - * uint32_t sqn; - * uint64_t tir_icm_addr; - * }; - */ + /* + * struct mlx5dv_qp { + * __be32 *dbrec; + * struct { + * void *buf; + * uint32_t wqe_cnt; + * uint32_t stride; + * } sq; + * struct { + * void *buf; + * uint32_t wqe_cnt; + * uint32_t stride; + * } rq; + * struct { + * void *reg; + * uint32_t size; + * } bf; + * uint64_t comp_mask; + * off_t uar_mmap_offset; + * uint32_t tirn; + * uint32_t tisn; + * uint32_t rqn; + * uint32_t sqn; + * uint64_t tir_icm_addr; + * }; + */ - gpu_qp->dbrec = &qp_out.dbrec[1]; // points to two pointers: 0 -> MLX5_REC_DBR, 1 -> MLX5_SND_DBR - gpu_qp->sq_buf = reinterpret_cast(qp_out.sq.buf); - gpu_qp->sq_wqe_cnt = qp_out.sq.wqe_cnt; - gpu_qp->rkey = htobe32(heap_rkey[conn_num % num_pes]); - gpu_qp->lkey = htobe32(heap_mr->lkey); - gpu_qp->qp_num = qps[conn_num]->qp_num; - gpu_qp->inline_threshold = inline_threshold; - // The 2 in qp_out.bf.size * 2 below facilitates the switching between blue flame registers - void* gpu_ptr{nullptr}; - rocm_memory_lock_to_fine_grain(qp_out.bf.reg, qp_out.bf.size * 2, &gpu_ptr, hip_dev_id); - gpu_qp->db.ptr = reinterpret_cast(gpu_ptr); -#endif // !GDA_IONIC + gpu_qp->dbrec = &qp_out.dbrec[1]; // points to two pointers: 0 -> MLX5_REC_DBR, 1 -> MLX5_SND_DBR + gpu_qp->sq_buf = reinterpret_cast(qp_out.sq.buf); + gpu_qp->sq_wqe_cnt = qp_out.sq.wqe_cnt; + gpu_qp->rkey = htobe32(heap_rkey[conn_num % num_pes]); + gpu_qp->lkey = htobe32(heap_mr->lkey); + gpu_qp->qp_num = qps[conn_num]->qp_num; + gpu_qp->inline_threshold = inline_threshold; + // The 2 in qp_out.bf.size * 2 below facilitates the switching between blue flame registers + void* gpu_ptr{nullptr}; + rocm_memory_lock_to_fine_grain(qp_out.bf.reg, qp_out.bf.size * 2, &gpu_ptr, hip_dev_id); + gpu_qp->db.ptr = reinterpret_cast(gpu_ptr); + } } void GDABackend::create_qps(int sq_length) { @@ -1107,18 +1142,19 @@ void GDABackend::create_qps(int sq_length) { attr.cap.max_send_wr = sq_length; attr.cap.max_send_sge = 1; attr.cap.max_inline_data = inline_threshold; -#ifdef GDA_IONIC - attr.cap.max_recv_sge = 1; // TODO allow zero sges in the driver -#endif attr.sq_sig_all = 0; attr.qp_type = IBV_QPT_RC; attr.comp_mask = IBV_QP_INIT_ATTR_PD; attr.pd = pd_parent; + if (gda_vendor == GDAVendor::IONIC) { + attr.cap.max_recv_sge = 1; // TODO allow zero sges in the driver + } + for (int i = 0; i < qps.size(); i++) { -#ifdef GDA_IONIC - attr.pd = pd_uxdma[((i + 1) / 2) & 1]; -#endif + if (gda_vendor == GDAVendor::IONIC) { + attr.pd = pd_uxdma[((i + 1) / 2) & 1]; + } attr.send_cq = cqs[i]; attr.recv_cq = cqs[i]; @@ -1126,7 +1162,6 @@ void GDABackend::create_qps(int sq_length) { CHECK_NNULL(qps[i], "ibv_create_qp_ex"); } } -#endif void GDABackend::select_gid_index() { struct ibv_gid_entry *gid_entries; diff --git a/src/gda/backend_gda.hpp b/src/gda/backend_gda.hpp index 56313a0ac8..aa8b42eccb 100644 --- a/src/gda/backend_gda.hpp +++ b/src/gda/backend_gda.hpp @@ -36,9 +36,9 @@ #include "queue_pair.hpp" #include "bootstrap/bootstrap.hpp" #include "debug_gda.hpp" - -#ifdef GDA_BNXT -#include +#include "gda/ionic/provider_gda_ionic.hpp" +#include "gda/bnxt/provider_gda_bnxt.hpp" +#include "gda/mlx5/provider_gda_mlx5.hpp" struct bnxtdv_funcs_t { int (*init_obj)(struct bnxt_re_dv_obj *obj, uint64_t obj_type); @@ -61,15 +61,10 @@ struct bnxtdv_funcs_t { int (*get_default_db_region)(struct ibv_context *ibvctx, struct bnxt_re_dv_db_region_attr *out); }; -#endif /* GDA_BNXT */ - -#ifdef GDA_MLX5 -#include struct mlx5dv_funcs_t { int (*init_obj)(struct mlx5dv_obj *obj, uint64_t obj_type); }; -#endif /* GDA_MLX5 */ /* Helper Macros for handling dynamic libraries */ #define PPCAT_NX(prefix, func_name) prefix##func_name @@ -96,6 +91,13 @@ class GDAHostContext; class QueuePair; class HostInterface; +enum GDAVendor { + NONE, + IONIC, + BNXT, + MLX5 +}; + class GDABackend : public Backend { private: typedef struct dest_info { @@ -108,6 +110,7 @@ class GDABackend : public Backend { char *requested_dev = nullptr; struct ibv_context *context = nullptr;; struct ibv_pd *pd_orig = nullptr; + enum GDAVendor gda_vendor = GDAVendor::NONE; struct ibv_port_attr portinfo; union ibv_gid gid; @@ -125,21 +128,23 @@ class GDABackend : public Backend { std::vector cqs; std::vector dest_info; -#ifdef GDA_BNXT + /* GDA_BNXT START */ std::vector bnxt_qps; std::vector bnxt_cqs; struct bnxt_re_dv_db_region_attr db_region_attr; -#else - struct ibv_pd *pd_parent = nullptr; -#endif + /* GDA_BNXT END */ -#ifdef GDA_IONIC + /* GDA_IONIC & GDA_MLX5 START */ + struct ibv_pd *pd_parent = nullptr; + /* GDA_IONIC & GDA_MLX5 END */ + + /* GDA_IONIC START */ struct ibv_pd *pd_uxdma[2]; void *gpu_db_page = nullptr; uint64_t *gpu_db_cq = nullptr; uint64_t *gpu_db_sq = nullptr; -#endif + /* GDA_IONIC END */ /** * @brief Common code invoked from the different constructors @@ -327,6 +332,7 @@ class GDABackend : public Backend { void cleanup_heap_memory_rkey(); void initialize_gpu_qp(QueuePair* qp, int conn_num); + void bnxt_initialize_gpu_qp(QueuePair* qp, int conn_num); /** * @brief Setup InfiniBand Resources @@ -338,6 +344,11 @@ class GDABackend : public Backend { */ void cleanup_ibv(); + /** + * @brief Detect the available direct verbs libraries + */ + void autodetect_dv_libs(); + /** * @brief Open InfiniBand Device and create common structures */ @@ -357,11 +368,13 @@ class GDABackend : public Backend { * @brief Create all CQs with a of length ncqes */ void create_cqs(int ncqes); + void bnxt_create_cqs(int ncqes); /** * @brief Create all QPs with a SQ of length sq_length */ void create_qps(int sq_length); + void bnxt_create_qps(int sq_length); /** * @brief Exchange QP information for connection @@ -388,13 +401,12 @@ class GDABackend : public Backend { */ int ibv_mtu_to_int(enum ibv_mtu mtu); -#ifndef GDA_BNXT - static void* pd_alloc(ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type); + static void* pd_alloc_host(ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type); + static void* pd_alloc_device_uncached(ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type); static void pd_release(ibv_pd* pd, void* pd_context, void* ptr, uint64_t resource_type); void create_parent_domain(); -#endif void setup_gpu_qps(); void cleanup_gpu_qps(); @@ -501,7 +513,6 @@ class GDABackend : public Backend { */ void rte_barrier(); -#ifdef GDA_BNXT /** * @brief structures holding the function pointers to the direct verbs functionality * of each network driver. @@ -517,9 +528,7 @@ class GDABackend : public Backend { * @brief initialize function table for BCOM direct verbs support */ int bnxt_dv_dl_init(); -#endif -#ifdef GDA_MLX5 /** * @brief structures holding the function pointers to the direct verbs functionality * of each network driver. @@ -535,7 +544,6 @@ class GDABackend : public Backend { * @brief initialize function table for MLNX direct verbs support */ int mlx5_dv_dl_init(); -#endif }; } // namespace rocshmem diff --git a/src/gda/bnxt/CMakeLists.txt b/src/gda/bnxt/CMakeLists.txt index 6db1d7904f..686c83f282 100644 --- a/src/gda/bnxt/CMakeLists.txt +++ b/src/gda/bnxt/CMakeLists.txt @@ -25,5 +25,12 @@ target_sources( ${PROJECT_NAME} PRIVATE backend_gda_bnxt.cpp +) + +if(GDA_BNXT) +target_sources( + ${PROJECT_NAME} + PRIVATE queue_pair_bnxt.cpp ) +endif() diff --git a/src/gda/bnxt/backend_gda_bnxt.cpp b/src/gda/bnxt/backend_gda_bnxt.cpp index e01e7d8040..46fc5d87e9 100644 --- a/src/gda/bnxt/backend_gda_bnxt.cpp +++ b/src/gda/bnxt/backend_gda_bnxt.cpp @@ -28,7 +28,7 @@ namespace rocshmem { -void GDABackend::initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) { +void GDABackend::bnxt_initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) { struct bnxt_re_dv_obj dv_obj; struct bnxt_re_dv_cq dv_cq; struct bnxt_re_dv_qp dv_qp; @@ -90,7 +90,7 @@ void GDABackend::initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) { gpu_qp->inline_threshold = inline_threshold; } -void GDABackend::create_cqs(int cqe) { +void GDABackend::bnxt_create_cqs(int cqe) { struct bnxt_re_dv_cq_attr cq_attr; struct bnxt_re_dv_cq_init_attr cq_init_attr; struct bnxt_re_dv_umem_reg_attr umem_attr; @@ -126,7 +126,7 @@ void GDABackend::create_cqs(int cqe) { } } -void GDABackend::create_qps(int sq_length) { +void GDABackend::bnxt_create_qps(int sq_length) { struct ibv_qp_init_attr ib_qp_attr; struct bnxt_re_dv_umem_reg_attr umem_attr; void *sq_ptr; @@ -246,4 +246,3 @@ int GDABackend::bnxt_dv_dl_init() { } } // namespace rocshmem - diff --git a/src/gda/bnxt/bnxt_re_dv.h b/src/gda/bnxt/bnxt_re_dv.h new file mode 100644 index 0000000000..39ebf7bf1d --- /dev/null +++ b/src/gda/bnxt/bnxt_re_dv.h @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2025, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Description: Direct verb support user interface header + */ + +#ifndef __BNXT_RE_DV_H__ +#define __BNXT_RE_DV_H__ + +#include +#include +#ifdef __cplusplus +extern "C" { +#endif + +struct bnxt_re_dv_qp { + uint64_t wqe_cnt; + uint64_t comp_mask; +}; + +struct bnxt_re_dv_cq { + uint32_t cqn; + uint32_t cqe_size; + uint64_t comp_mask; +}; + +struct bnxt_re_dv_srq { + uint32_t srqn; + uint64_t comp_mask; +}; + +struct bnxt_re_dv_ah { + uint32_t avid; + uint64_t comp_mask; +}; + +struct bnxt_re_dv_pd { + uint32_t pdn; + uint64_t comp_mask; +}; + +struct bnxt_re_dv_obj { + struct { + struct ibv_qp *in; + struct bnxt_re_dv_qp *out; + } qp; + struct { + struct ibv_cq *in; + struct bnxt_re_dv_cq *out; + } cq; + struct { + struct ibv_srq *in; + struct bnxt_re_dv_srq *out; + } srq; + struct { + struct ibv_ah *in; + struct bnxt_re_dv_ah *out; + } ah; + struct { + struct ibv_pd *in; + struct bnxt_re_dv_pd *out; + } pd; +}; + +int bnxt_re_dv_init_obj(struct bnxt_re_dv_obj *obj, uint64_t obj_type); + +enum bnxt_re_dv_obj_type { + BNXT_RE_DV_OBJ_QP = 1 << 0, + BNXT_RE_DV_OBJ_CQ = 1 << 1, + BNXT_RE_DV_OBJ_SRQ = 1 << 2, + BNXT_RE_DV_OBJ_AH = 1 << 3, + BNXT_RE_DV_OBJ_PD = 1 << 4, +}; + +int bnxt_re_dv_modify_qp_udp_sport(struct ibv_qp *qp, uint16_t udp_sport); + +struct bnxt_re_dv_db_region_attr { + uint32_t handle; + uint32_t dpi; + uint64_t umdbr; + __u64 *dbr; +}; + +#ifdef EXPERIMENTAL_APIS +struct bnxt_re_dv_db_region_attr * +bnxt_re_dv_alloc_db_region(struct ibv_context *ctx); +int bnxt_re_dv_free_db_region(struct ibv_context *ctx, + struct bnxt_re_dv_db_region_attr *attr); +#endif + +int bnxt_re_dv_get_default_db_region(struct ibv_context *ibvctx, + struct bnxt_re_dv_db_region_attr *out); + +enum bnxt_re_dv_umem_in_flags { + BNXT_RE_DV_UMEM_FLAGS_DMABUF = 1 << 0, +}; + +struct bnxt_re_dv_umem_reg_attr { + void *addr; + size_t size; + uint32_t access_flags; + uint64_t pgsz_bitmap; + uint64_t comp_mask; + int dmabuf_fd; +}; + +struct bnxt_re_dv_cq_init_attr { + uint64_t cq_handle; + void *umem_handle; /* umem_handle from umem_reg */ + uint64_t cq_umem_offset; /* offset into umem */ + uint32_t ncqe; +}; + +struct bnxt_re_dv_cq_attr { + uint32_t ncqe; /* no. of entries */ + uint32_t cqe_size; /* size of entries */ +}; + +struct bnxt_re_dv_qp_init_attr { + /* Standard ibv params */ + enum ibv_qp_type qp_type; + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + struct ibv_srq *srq; + + /* DV params */ + uint64_t qp_handle; /* to match with cqe */ + void *dbr_handle; /* dbr_handle from alloc_dbr */ + void *sq_umem_handle; /* umem_handle from umem_reg */ + uint64_t sq_umem_offset; /* offset into umem */ + uint32_t sq_len; /* sq length including MSN area */ + uint32_t sq_slots; /* sq length in slots */ + void *rq_umem_handle; /* umem_handle from umem_reg */ + uint64_t rq_umem_offset; /* offset into umem */ + uint32_t sq_wqe_sz; /* sq wqe size */ + uint32_t sq_psn_sz; /* sq psn size */ + uint32_t sq_npsn; /* sq num psn entries */ + uint32_t rq_len; /* rq length */ + uint32_t rq_slots; /* rq length in slots */ + uint32_t rq_wqe_sz; /* rq wqe size */ + uint64_t comp_mask; /* compatibility mask for future updates */ +}; + +struct bnxt_re_dv_qp_mem_info { + uint64_t qp_handle; /* to match with cqe */ + uint64_t sq_va; /* Peer-mem sq-va (not dma mapped) */ + uint32_t sq_len; /* sq length including MSN area */ + uint32_t sq_slots; /* sq length in slots */ + uint32_t sq_wqe_sz; /* sq wqe size */ + uint32_t sq_psn_sz; /* sq psn size */ + uint32_t sq_npsn; /* sq num psn entries */ + uint64_t rq_va; /* Peer-mem rq-va (not dma mapped) */ + uint32_t rq_len; /* rq length */ + uint32_t rq_slots; /* rq length in slots */ + uint32_t rq_wqe_sz; /* rq wqe size */ + uint64_t comp_mask; /* compatibility bit mask */ +}; + +void *bnxt_re_dv_umem_reg(struct ibv_context *ibvctx, + struct bnxt_re_dv_umem_reg_attr *in); +int bnxt_re_dv_umem_dereg(void *umem_handle); +struct ibv_cq *bnxt_re_dv_create_cq(struct ibv_context *ibvctx, + struct bnxt_re_dv_cq_init_attr *cq_attr); +int bnxt_re_dv_destroy_cq(struct ibv_cq *ibv_cq); +struct ibv_qp *bnxt_re_dv_create_qp(struct ibv_pd *pd, + struct bnxt_re_dv_qp_init_attr *qp_attr); +int bnxt_re_dv_destroy_qp(struct ibv_qp *ibvqp); +int bnxt_re_dv_modify_qp(struct ibv_qp *ibv_qp, struct ibv_qp_attr *attr, + int attr_mask, uint32_t type, uint32_t value); +int bnxt_re_dv_query_qp(void *qp_handle, struct ib_uverbs_qp_attr *attr); +int bnxt_re_dv_qp_mem_alloc(struct ibv_pd *ibvpd, + struct ibv_qp_init_attr *attr, + struct bnxt_re_dv_qp_mem_info *dv_qp_mem); +int bnxt_re_dv_qp_get_mem_info(struct ibv_pd *ibvpd, + struct ibv_qp_init_attr *attr, + struct bnxt_re_dv_qp_mem_info *qp_mem); +int bnxt_re_dv_get_cq_attr(struct ibv_context *ibvctx, uint32_t ncqe, + struct bnxt_re_dv_cq_attr *cq_attr); +void *bnxt_re_dv_cq_mem_alloc(struct ibv_context *ibvctx, int num_cqe, + struct bnxt_re_dv_cq_attr *cq_attr); +#ifdef __cplusplus +} +#endif +#endif /* __BNXT_RE_DV_H__ */ diff --git a/src/gda/bnxt/bnxt_re_hsi.h b/src/gda/bnxt/bnxt_re_hsi.h new file mode 100644 index 0000000000..710222ba9d --- /dev/null +++ b/src/gda/bnxt/bnxt_re_hsi.h @@ -0,0 +1,416 @@ +/* + * Copyright (c) 2025, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Description: Fast path definitions for bnxt_re + */ + +#ifndef __BNXT_RE_HSI_H__ +#define __BNXT_RE_HSI_H__ + +#ifdef __cplusplus +extern "C" { +#endif +#define true 1 +#define false 0 + +#define BNXT_RE_SLOT_SIZE_BB 16 +#define BNXT_RE_STATIC_WQE_SIZE_SLOTS 4 +#define BNXT_RE_STATIC_WQE_BB (BNXT_RE_STATIC_WQE_SIZE_SLOTS * BNXT_RE_SLOT_SIZE_BB) +#define BNXT_RE_STATIC_WQE_SHIFT 6 + +#define BNXT_RE_STATIC_RQE_SIZE_SLOTS 4 +#define BNXT_RE_STATIC_RQE_BB (BNXT_RE_STATIC_RQE_SIZE_SLOTS * BNXT_RE_SLOT_SIZE_BB) +#define BNXT_RE_STATIC_RQE_SHIFT 6 + +#define BNXT_RE_STATIC_CQE_SIZE_SLOTS 4 +#define BNXT_RE_STATIC_CQE_BB (BNXT_RE_STATIC_CQE_SIZE_SLOTS * BNXT_RE_SLOT_SIZE_BB) +#define BNXT_RE_STATIC_CQE_SHIFT 6 + +#define BNXT_RE_QUEUE_START_PHASE 0x01 + +enum bnxt_re_wr_opcode { + BNXT_RE_WR_OPCD_SEND = 0x00, + BNXT_RE_WR_OPCD_SEND_IMM = 0x01, + BNXT_RE_WR_OPCD_SEND_INVAL = 0x02, + BNXT_RE_WR_OPCD_RDMA_WRITE = 0x04, + BNXT_RE_WR_OPCD_RDMA_WRITE_IMM = 0x05, + BNXT_RE_WR_OPCD_RDMA_READ = 0x06, + BNXT_RE_WR_OPCD_ATOMIC_CS = 0x08, + BNXT_RE_WR_OPCD_ATOMIC_FA = 0x0B, + BNXT_RE_WR_OPCD_LOC_INVAL = 0x0C, + BNXT_RE_WR_OPCD_BIND = 0x0E, + BNXT_RE_WR_OPCD_FR_PPMR = 0x0F, + BNXT_RE_WR_OPCD_RECV = 0x80, + BNXT_RE_WR_OPCD_INVAL = 0xFF +}; + +enum bnxt_re_wr_flags { + BNXT_RE_WR_FLAGS_DBG_TRACE = 0x40, + BNXT_RE_WR_FLAGS_TS_EN = 0x20, + BNXT_RE_WR_FLAGS_INLINE = 0x10, + BNXT_RE_WR_FLAGS_SE = 0x08, + BNXT_RE_WR_FLAGS_UC_FENCE = 0x04, + BNXT_RE_WR_FLAGS_RD_FENCE = 0x02, + BNXT_RE_WR_FLAGS_SIGNALED = 0x01 +}; + +#define BNXT_RE_MEMW_TYPE_2 0x02 +#define BNXT_RE_MEMW_TYPE_1 0x00 +enum bnxt_re_wr_bind_acc { + BNXT_RE_WR_BIND_ACC_LWR = 0x01, + BNXT_RE_WR_BIND_ACC_RRD = 0x02, + BNXT_RE_WR_BIND_ACC_RWR = 0x04, + BNXT_RE_WR_BIND_ACC_RAT = 0x08, + BNXT_RE_WR_BIND_ACC_MWB = 0x10, + BNXT_RE_WR_BIND_ACC_ZBVA = 0x01, + BNXT_RE_WR_BIND_ACC_SHIFT = 0x10 +}; + +enum bnxt_re_wc_type { + BNXT_RE_WC_TYPE_SEND = 0x00, + BNXT_RE_WC_TYPE_RECV_RC = 0x01, + BNXT_RE_WC_TYPE_RECV_UD = 0x02, + BNXT_RE_WC_TYPE_RECV_RAW = 0x03, + BNXT_RE_WC_TYPE_NOOP = 0x0D, + BNXT_RE_WC_TYPE_TERM = 0x0E, + BNXT_RE_WC_TYPE_COFF = 0x0F +}; + +#define BNXT_RE_WC_OPCD_RECV 0x80 +enum bnxt_re_req_wc_status { + BNXT_RE_REQ_ST_OK = 0x00, + BNXT_RE_REQ_ST_BAD_RESP = 0x01, + BNXT_RE_REQ_ST_LOC_LEN = 0x02, + BNXT_RE_REQ_ST_LOC_QP_OP = 0x03, + BNXT_RE_REQ_ST_PROT = 0x04, + BNXT_RE_REQ_ST_MEM_OP = 0x05, + BNXT_RE_REQ_ST_REM_INVAL = 0x06, + BNXT_RE_REQ_ST_REM_ACC = 0x07, + BNXT_RE_REQ_ST_REM_OP = 0x08, + BNXT_RE_REQ_ST_RNR_NAK_XCED = 0x09, + BNXT_RE_REQ_ST_TRNSP_XCED = 0x0A, + BNXT_RE_REQ_ST_WR_FLUSH = 0x0B +}; + +enum bnxt_re_rsp_wc_status { + BNXT_RE_RSP_ST_OK = 0x00, + BNXT_RE_RSP_ST_LOC_ACC = 0x01, + BNXT_RE_RSP_ST_LOC_LEN = 0x02, + BNXT_RE_RSP_ST_LOC_PROT = 0x03, + BNXT_RE_RSP_ST_LOC_QP_OP = 0x04, + BNXT_RE_RSP_ST_MEM_OP = 0x05, + BNXT_RE_RSP_ST_REM_INVAL = 0x06, + BNXT_RE_RSP_ST_WR_FLUSH = 0x07, + BNXT_RE_RSP_ST_HW_FLUSH = 0x08 +}; + +enum bnxt_re_hdr_offset { + BNXT_RE_HDR_WT_MASK = 0xFF, + BNXT_RE_HDR_FLAGS_MASK = 0xFF, + BNXT_RE_HDR_FLAGS_SHIFT = 0x08, + BNXT_RE_HDR_WS_MASK = 0xFF, + BNXT_RE_HDR_WS_SHIFT = 0x10, + BNXT_RE_HDR_ZB_SHIFT = 0x16, + BNXT_RE_HDR_MW_SHIFT = 0x17, + BNXT_RE_HDR_ACC_SHIFT = 0x18, + BNXT_RE_HDR_IL_MASK = 0x0F, + BNXT_RE_HDR_IL_SHIFT = 0x18, +}; + +enum bnxt_re_db_que_type { + BNXT_RE_QUE_TYPE_SQ = 0x00, + BNXT_RE_QUE_TYPE_RQ = 0x01, + BNXT_RE_QUE_TYPE_SRQ = 0x02, + BNXT_RE_QUE_TYPE_SRQ_ARM = 0x03, + BNXT_RE_QUE_TYPE_CQ = 0x04, + BNXT_RE_QUE_TYPE_CQ_ARMSE = 0x05, + BNXT_RE_QUE_TYPE_CQ_ARMALL = 0x06, + BNXT_RE_QUE_TYPE_CQ_ARMENA = 0x07, + BNXT_RE_QUE_TYPE_SRQ_ARMENA = 0x08, + BNXT_RE_QUE_TYPE_CQ_CUT_ACK = 0x09, + BNXT_RE_PUSH_TYPE_START = 0x0C, + BNXT_RE_PUSH_TYPE_END = 0x0D, + BNXT_RE_QUE_TYPE_NULL = 0x0F +}; + +enum bnxt_re_db_mask { + BNXT_RE_DB_INDX_MASK = 0xFFFFFFUL, + BNXT_RE_DB_PILO_MASK = 0x0FFUL, + BNXT_RE_DB_PILO_SHIFT = 0x18, + BNXT_RE_DB_QID_MASK = 0xFFFFFUL, + BNXT_RE_DB_PIHI_MASK = 0xF00UL, + BNXT_RE_DB_PIHI_SHIFT = 0x0C, /* Because mask is 0xF00 */ + BNXT_RE_DB_TYP_MASK = 0x0FUL, + BNXT_RE_DB_TYP_SHIFT = 0x1C, + BNXT_RE_DB_VALID_SHIFT = 0x1A, + BNXT_RE_DB_EPOCH_SHIFT = 0x18, + BNXT_RE_DB_TOGGLE_SHIFT = 0x19, + +}; + +enum bnxt_re_psns_mask { + BNXT_RE_PSNS_SPSN_MASK = 0xFFFFFF, + BNXT_RE_PSNS_OPCD_MASK = 0xFF, + BNXT_RE_PSNS_OPCD_SHIFT = 0x18, + BNXT_RE_PSNS_NPSN_MASK = 0xFFFFFF, + BNXT_RE_PSNS_FLAGS_MASK = 0xFF, + BNXT_RE_PSNS_FLAGS_SHIFT = 0x18 +}; + +enum bnxt_re_msns_mask { + BNXT_RE_SQ_MSN_SEARCH_START_PSN_MASK = 0xFFFFFFUL, + BNXT_RE_SQ_MSN_SEARCH_START_PSN_SHIFT = 0, + BNXT_RE_SQ_MSN_SEARCH_NEXT_PSN_MASK = 0xFFFFFF000000ULL, + BNXT_RE_SQ_MSN_SEARCH_NEXT_PSN_SHIFT = 0x18, + BNXT_RE_SQ_MSN_SEARCH_START_IDX_MASK = 0xFFFF000000000000ULL, + BNXT_RE_SQ_MSN_SEARCH_START_IDX_SHIFT = 0x30 +}; + +enum bnxt_re_bcqe_mask { + BNXT_RE_BCQE_PH_MASK = 0x01, + BNXT_RE_BCQE_TYPE_MASK = 0x0F, + BNXT_RE_BCQE_TYPE_SHIFT = 0x01, + BNXT_RE_BCQE_RESIZE_TOG_MASK = 0x03, + BNXT_RE_BCQE_RESIZE_TOG_SHIFT = 0x05, + BNXT_RE_BCQE_STATUS_MASK = 0xFF, + BNXT_RE_BCQE_STATUS_SHIFT = 0x08, + BNXT_RE_BCQE_FLAGS_MASK = 0xFFFFU, + BNXT_RE_BCQE_FLAGS_SHIFT = 0x10, + + /* wr_id for V1/V2 */ + BNXT_RE_BCQE_RWRID_MASK = 0xFFFFFU, + + /* higher 16b of source QP for V1/V2 */ + BNXT_RE_BCQE_SRCQP_MASK = 0xFF, + BNXT_RE_BCQE_SRCQP_SHIFT = 0x18 +}; + +enum bnxt_re_rc_flags_mask { + BNXT_RE_RC_FLAGS_SRQ_RQ_MASK = 0x01, + BNXT_RE_RC_FLAGS_IMM_MASK = 0x02, + BNXT_RE_RC_FLAGS_IMM_SHIFT = 0x01, + BNXT_RE_RC_FLAGS_INV_MASK = 0x04, + BNXT_RE_RC_FLAGS_INV_SHIFT = 0x02, + BNXT_RE_RC_FLAGS_RDMA_MASK = 0x08, + BNXT_RE_RC_FLAGS_RDMA_SHIFT = 0x03 +}; + +enum bnxt_re_ud_flags_mask { + BNXT_RE_UD_FLAGS_SRQ_RQ_MASK = 0x01, + BNXT_RE_UD_FLAGS_SRQ_RQ_SFT = 0x00, + BNXT_RE_UD_FLAGS_IMM_MASK = 0x02, + BNXT_RE_UD_FLAGS_IMM_SFT = 0x01, + BNXT_RE_UD_FLAGS_IP_VER_MASK = 0x30, + BNXT_RE_UD_FLAGS_IP_VER_SFT = 0x4, + + /* the following has been removed in V3 */ + BNXT_RE_UD_FLAGS_META_MASK = 0x3C0, + BNXT_RE_UD_FLAGS_META_SFT = 0x6, + BNXT_RE_UD_FLAGS_EXT_META_MASK = 0xC00, + BNXT_RE_UD_FLAGS_EXT_META_SFT = 0x10, +}; + +enum bnxt_re_ud_cqe_mask { + BNXT_RE_UD_CQE_MAC_MASK = 0xFFFFFFFFFFFFULL, + BNXT_RE_UD_CQE_SRCQPLO_MASK = 0xFFFF, + BNXT_RE_UD_CQE_SRCQPLO_SHIFT = 0x30, + BNXT_RE_UD_CQE_LEN_MASK = 0x3FFFU +}; + +enum bnxt_re_que_flags_mask { + BNXT_RE_FLAG_EPOCH_TAIL_SHIFT = 0x0UL, + BNXT_RE_FLAG_EPOCH_HEAD_SHIFT = 0x1UL, + BNXT_RE_FLAG_EPOCH_TAIL_MASK = 0x1UL, + BNXT_RE_FLAG_EPOCH_HEAD_MASK = 0x2UL, +}; + +enum bnxt_re_db_epoch_flag_shift { + BNXT_RE_DB_EPOCH_TAIL_SHIFT = BNXT_RE_DB_EPOCH_SHIFT, + BNXT_RE_DB_EPOCH_HEAD_SHIFT = (BNXT_RE_DB_EPOCH_SHIFT - 1) +}; + +enum bnxt_re_ppp_st_en_mask { + BNXT_RE_PPP_ENABLED_MASK = 0x1UL, + BNXT_RE_PPP_STATE_MASK = 0x2UL, +}; + +enum bnxt_re_ppp_st_shift { + BNXT_RE_PPP_ST_SHIFT = 0x1UL +}; + +struct bnxt_re_db_hdr { + __u64 typ_qid_indx; /* typ: 4, qid:20 (qid:12 on V3), indx:24 */ +}; + +#define BNXT_RE_CHIP_ID0_CHIP_NUM_SFT 0x00 +#define BNXT_RE_CHIP_ID0_CHIP_REV_SFT 0x10 +#define BNXT_RE_CHIP_ID0_CHIP_MET_SFT 0x18 + +#define BNXT_RE_STATIC_WQE_MAX_SGE 0x06 +#define BNXT_RE_WQE_MODES_WQE_MODE_MASK 0x01 + +struct bnxt_re_bcqe { + __u32 flg_st_typ_ph; + __u32 qphi_rwrid; /* This field becomes opaque in V3 */ +} __attribute__((packed)); + +struct bnxt_re_req_cqe { + __u64 qp_handle; + __u32 con_indx; /* 16 bits valid. */ + __u32 rsvd1; + __u64 rsvd2; +} __attribute__((packed)); + +struct bnxt_re_rc_cqe { + __u32 length; + __u32 imm_key; + __u64 qp_handle; + __u64 mr_handle; +} __attribute__((packed)); + +struct bnxt_re_ud_cqe { + __u32 length; /* 14 bits */ + __u32 immd; + __u64 qp_handle; + __u64 qplo_mac; /* 16:48*/ +} __attribute__((packed)); + +struct bnxt_re_term_cqe { + __u64 qp_handle; + __u32 rq_sq_cidx; + __u32 rsvd; + __u64 rsvd1; +} __attribute__((packed)); + +struct bnxt_re_cutoff_cqe { + __u64 rsvd1; + __u64 rsvd2; + __u64 rsvd3; + __u8 cqe_type_toggle; + __u8 status; + __u16 rsvd4; + __u32 rsvd5; +} __attribute__((packed)); + +union lower_shdr { + __u64 qkey_len; + __u64 lkey_plkey; + __u64 rva; +}; + +struct bnxt_re_bsqe { + __u32 rsv_ws_fl_wt; + __u32 key_immd; + union lower_shdr lhdr; +} __attribute__((packed)); + +struct bnxt_re_psns_ext { + __u32 opc_spsn; + __u32 flg_npsn; + __u16 st_slot_idx; + __u16 rsvd0; + __u32 rsvd1; +} __attribute__((packed)); + +/* sq_msn_search (size:64b/8B) */ +struct bnxt_re_msns { + __u64 start_idx_next_psn_start_psn; +} __attribute__((packed)); + +struct bnxt_re_psns { + __u32 opc_spsn; + __u32 flg_npsn; +} __attribute__((packed)); + +struct bnxt_re_sge { + __u64 pa; + __u32 lkey; + __u32 length; +} __attribute__((packed)); + +struct bnxt_re_send { + __u32 dst_qp; + __u32 avid; + __u64 rsvd; +} __attribute__((packed)); + +struct bnxt_re_raw { + __u32 cfa_meta; + __u32 ts; /* timestamp for V3 */ + __u64 rsvd3; /* timestamp for V1/V2 */ +} __attribute__((packed)); + +struct bnxt_re_rdma { + __u64 rva; + __u32 rkey; + __u32 ts; /* timestamp for V3 */ +} __attribute__((packed)); + +struct bnxt_re_atomic { + __u64 swp_dt; + __u64 cmp_dt; +} __attribute__((packed)); + +struct bnxt_re_inval { + __u64 rsvd[2]; +} __attribute__((packed)); + +struct bnxt_re_bind { + __u64 va; + __u64 len; /* only 40 bits are valid for V1/V2. Full 64-bit for V3 */ +} __attribute__((packed)); + +struct bnxt_re_brqe { + __u32 rsv_ws_fl_wt; + __u32 opaque; /* opaque is V3 only */ + __u32 wrid; /* wrid is V1/V2 only */ + __u32 rsvd1; +} __attribute__((packed)); + +/* V1/V2 only. For V3, sge immediately follows struct bnxt_re_brqe */ +struct bnxt_re_rqe { + __u64 rsvd[2]; +} __attribute__((packed)); + +/* SRQ */ +struct bnxt_re_srqe { + __u64 rsvd[2]; +} __attribute__((packed)); + +struct bnxt_re_push_wqe { + __u64 addr[32]; +} __attribute__((packed)); +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/gda/bnxt/provider_gda_bnxt.hpp b/src/gda/bnxt/provider_gda_bnxt.hpp index 7b1853a78a..dd83fa58c5 100644 --- a/src/gda/bnxt/provider_gda_bnxt.hpp +++ b/src/gda/bnxt/provider_gda_bnxt.hpp @@ -26,17 +26,10 @@ #define LIBRARY_SRC_GDA_BNXT_GDA_PROVIDER_HPP_ extern "C" { -#include -#include +#include "gda/bnxt/bnxt_re_dv.h" +#include "gda/bnxt/bnxt_re_hsi.h" } -#define GDA_DEFAULT_GID 3 -#define GDA_MAX_ATOMIC 1 -#define GDA_OP_RDMA_WRITE BNXT_RE_WR_OPCD_RDMA_WRITE -#define GDA_OP_RDMA_READ BNXT_RE_WR_OPCD_RDMA_READ -#define GDA_OP_ATOMIC_FA BNXT_RE_WR_OPCD_ATOMIC_FA -#define GDA_OP_ATOMIC_CS BNXT_RE_WR_OPCD_ATOMIC_CS - #define bnxt_re_get_cqe_sz() (sizeof(struct bnxt_re_req_cqe) + \ sizeof(struct bnxt_re_bcqe)) diff --git a/src/gda/bnxt/queue_pair_bnxt.cpp b/src/gda/bnxt/queue_pair_bnxt.cpp index e6319da754..44b8de616a 100644 --- a/src/gda/bnxt/queue_pair_bnxt.cpp +++ b/src/gda/bnxt/queue_pair_bnxt.cpp @@ -267,7 +267,7 @@ __device__ void QueuePair::post_wqe_rma(int pe, int32_t length, uintptr_t *laddr uint32_t rma_slots = 3; // (Three slots: hdr, rdma, sge) inline_msg = length <= inline_threshold && - opcode == GDA_OP_RDMA_WRITE; + opcode == gda_op_rdma_write; hdr_ptr = (struct bnxt_re_bsqe*) bnxt_re_get_hwqe(&sq, 0); rdma_ptr = (struct bnxt_re_rdma*) bnxt_re_get_hwqe(&sq, 1); diff --git a/src/gda/context_gda_tmpl_device.hpp b/src/gda/context_gda_tmpl_device.hpp index 12d6d2274d..5a2d91175b 100644 --- a/src/gda/context_gda_tmpl_device.hpp +++ b/src/gda/context_gda_tmpl_device.hpp @@ -99,7 +99,7 @@ __device__ void GDAContext::amo_add(void *dst, T value, int pe) { uint8_t lane = __ffsll((unsigned long long)turns) - 1; int pe_turn = __shfl(pe, lane); if (pe_turn == pe) { - qps[pe].atomic_nofetch(base_heap[pe] + L_offset, value, 0, pe, GDA_OP_ATOMIC_FA); + qps[pe].atomic_nofetch(base_heap[pe] + L_offset, value, 0, pe); need_turn = false; } turns = __ballot(need_turn); @@ -113,7 +113,7 @@ __device__ void GDAContext::amo_set(void *dst, T value, int pe) { T ret_val; T cond = 0; for (int i = 0; i < WF_SIZE; i++) { //TODO: this looks wrong - while ((ret_val = qps[pe].atomic_fetch(base_heap[pe] + L_offset, value, cond, pe, GDA_OP_ATOMIC_CS))) { + while ((ret_val = qps[pe].atomic_cas(base_heap[pe] + L_offset, value, cond, pe))) { if (ret_val == cond) { break; } cond = ret_val; } @@ -171,7 +171,7 @@ __device__ void GDAContext::amo_cas(void *dst, T value, T cond, int pe) { if constexpr (sizeof(T) != 8) { printf("rocshmem::gda:amo_cas not implemented for non-64bit types.\n"); abort(); }//TODO:support for non-uint64t uint64_t L_offset = reinterpret_cast(dst) - base_heap[my_pe]; for (int i = 0; i < WF_SIZE; i++) { //TODO: this looks wrong - qps[pe].atomic_nofetch(base_heap[pe] + L_offset, value, cond, pe, GDA_OP_ATOMIC_CS); + qps[pe].atomic_cas_nofetch(base_heap[pe] + L_offset, value, cond, pe); } } @@ -186,7 +186,7 @@ __device__ T GDAContext::amo_fetch_add(void *dst, T value, int pe) { uint8_t lane = __ffsll((unsigned long long)turns) - 1; int pe_turn = __shfl(pe, lane); if (pe_turn == pe) { - ret_val = qps[pe].atomic_fetch(base_heap[pe] + L_offset, value, 0, pe, GDA_OP_ATOMIC_FA); + ret_val = qps[pe].atomic_fetch(base_heap[pe] + L_offset, value, 0, pe); need_turn = false; } turns = __ballot(need_turn); @@ -200,7 +200,7 @@ __device__ T GDAContext::amo_fetch_cas(void *dst, T value, T cond, int pe) { uint64_t L_offset = reinterpret_cast(dst) - base_heap[my_pe]; T ret_val; for (int i = 0; i < WF_SIZE; i++) { - ret_val = qps[pe].atomic_fetch(base_heap[pe] + L_offset, value, cond, pe, GDA_OP_ATOMIC_CS); + ret_val = qps[pe].atomic_cas(base_heap[pe] + L_offset, value, cond, pe); } return ret_val; } diff --git a/src/gda/debug_gda.hpp b/src/gda/debug_gda.hpp index 214f6d0d5c..a498ef6882 100644 --- a/src/gda/debug_gda.hpp +++ b/src/gda/debug_gda.hpp @@ -30,11 +30,8 @@ static void dump_ibv_device(struct ibv_device *x); static void dump_ibv_pd(struct ibv_pd *x); static void dump_ibv_port_attr(struct ibv_port_attr *x); static void dump_ibv_qp(struct ibv_qp *qp, int conn_num); - -#if defined(GDA_MLX5) static void dump_mlx5dv_qp(struct mlx5dv_qp *qp_dv, int conn_num); static void dump_mlx5dv_cq(struct mlx5dv_cq *cq_dv, int conn_num); -#endif static void dump_ibv_context(struct ibv_context* x) { /* @@ -194,7 +191,6 @@ void dump_ibv_qp(struct ibv_qp *qp, int conn_num) { DPRINTF("=========== QP_DUMP_END CONNECTION#%d ========\n", conn_num); } -#if defined(GDA_MLX5) void dump_mlx5dv_qp(struct mlx5dv_qp *qp_dv, int conn_num) { DPRINTF("\n"); DPRINTF("===============================================\n"); @@ -235,6 +231,5 @@ void dump_mlx5dv_cq(struct mlx5dv_cq *cq_dv, int conn_num) { DPRINTF(" (uint64_t) comp_mask = 0x%lx\n", cq_dv->comp_mask); DPRINTF("================== CQ_DUMP_END ================\n"); } -#endif // GDA_MLX5 #endif /* LIBRARY_SRC_GDA_DEBUG_GDA_HPP_ */ diff --git a/src/gda/ionic/provider_gda_ionic.hpp b/src/gda/ionic/provider_gda_ionic.hpp new file mode 100644 index 0000000000..73397a9464 --- /dev/null +++ b/src/gda/ionic/provider_gda_ionic.hpp @@ -0,0 +1,39 @@ +/****************************************************************************** + * Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#ifndef LIBRARY_SRC_GDA_IONIC_GDA_PROVIDER_HPP_ +#define LIBRARY_SRC_GDA_IONIC_GDA_PROVIDER_HPP_ + +#ifdef GDA_IONIC +extern "C" { +#include +#include +} +#endif + +#define SPIN_LOCK_INVALID 0xdead +#define SPIN_LOCK_UNLOCKED 0x1234 +#define SPIN_LOCK_LOCKED 0xabcd + +#endif //LIBRARY_SRC_GDA_IONIC_GDA_PROVIDER_HPP_ diff --git a/src/gda/mlx5/mlx5dv.h b/src/gda/mlx5/mlx5dv.h new file mode 100644 index 0000000000..11d646c304 --- /dev/null +++ b/src/gda/mlx5/mlx5dv.h @@ -0,0 +1,2229 @@ +/* + * Copyright (c) 2017 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX5DV_H_ +#define _MLX5DV_H_ + +#include +#include +#include /* For the __be64 type */ +#include +#include +#if defined(__SSE3__) +#include +#include +#include +#endif /* defined(__SSE3__) */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Always inline the functions */ +#ifdef __GNUC__ +#define MLX5DV_ALWAYS_INLINE inline __attribute__((always_inline)) +#else +#define MLX5DV_ALWAYS_INLINE inline +#endif + + +#define MLX5DV_RES_TYPE_QP ((uint64_t)RDMA_DRIVER_MLX5 << 32 | 1) +#define MLX5DV_RES_TYPE_RWQ ((uint64_t)RDMA_DRIVER_MLX5 << 32 | 2) +#define MLX5DV_RES_TYPE_DBR ((uint64_t)RDMA_DRIVER_MLX5 << 32 | 3) +#define MLX5DV_RES_TYPE_SRQ ((uint64_t)RDMA_DRIVER_MLX5 << 32 | 4) +#define MLX5DV_RES_TYPE_CQ ((uint64_t)RDMA_DRIVER_MLX5 << 32 | 5) + +enum { + MLX5_RCV_DBR = 0, + MLX5_SND_DBR = 1, +}; + +enum mlx5dv_context_comp_mask { + MLX5DV_CONTEXT_MASK_CQE_COMPRESION = 1 << 0, + MLX5DV_CONTEXT_MASK_SWP = 1 << 1, + MLX5DV_CONTEXT_MASK_STRIDING_RQ = 1 << 2, + MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS = 1 << 3, + MLX5DV_CONTEXT_MASK_DYN_BFREGS = 1 << 4, + MLX5DV_CONTEXT_MASK_CLOCK_INFO_UPDATE = 1 << 5, + MLX5DV_CONTEXT_MASK_FLOW_ACTION_FLAGS = 1 << 6, + MLX5DV_CONTEXT_MASK_DC_ODP_CAPS = 1 << 7, + MLX5DV_CONTEXT_MASK_HCA_CORE_CLOCK = 1 << 8, + MLX5DV_CONTEXT_MASK_NUM_LAG_PORTS = 1 << 9, + MLX5DV_CONTEXT_MASK_SIGNATURE_OFFLOAD = 1 << 10, + MLX5DV_CONTEXT_MASK_DCI_STREAMS = 1 << 11, + MLX5DV_CONTEXT_MASK_WR_MEMCPY_LENGTH = 1 << 12, + MLX5DV_CONTEXT_MASK_CRYPTO_OFFLOAD = 1 << 13, +}; + +struct mlx5dv_cqe_comp_caps { + uint32_t max_num; + uint32_t supported_format; /* enum mlx5dv_cqe_comp_res_format */ +}; + +struct mlx5dv_sw_parsing_caps { + uint32_t sw_parsing_offloads; /* Use enum mlx5dv_sw_parsing_offloads */ + uint32_t supported_qpts; +}; + +struct mlx5dv_striding_rq_caps { + uint32_t min_single_stride_log_num_of_bytes; + uint32_t max_single_stride_log_num_of_bytes; + uint32_t min_single_wqe_log_num_of_strides; + uint32_t max_single_wqe_log_num_of_strides; + uint32_t supported_qpts; +}; + +struct mlx5dv_dci_streams_caps { + uint8_t max_log_num_concurent; + uint8_t max_log_num_errored; +}; + +enum mlx5dv_tunnel_offloads { + MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_VXLAN = 1 << 0, + MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GRE = 1 << 1, + MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GENEVE = 1 << 2, + MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_GRE = 1 << 3, + MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_UDP = 1 << 4, +}; + +enum mlx5dv_flow_action_cap_flags { + MLX5DV_FLOW_ACTION_FLAGS_ESP_AES_GCM = 1 << 0, + MLX5DV_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA = 1 << 1, + MLX5DV_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING = 1 << 2, + MLX5DV_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD = 1 << 3, + MLX5DV_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN = 1 << 4, +}; + +enum mlx5dv_sig_type { + MLX5DV_SIG_TYPE_T10DIF, + MLX5DV_SIG_TYPE_CRC, +}; + +enum mlx5dv_sig_prot_caps { + MLX5DV_SIG_PROT_CAP_T10DIF = 1 << MLX5DV_SIG_TYPE_T10DIF, + MLX5DV_SIG_PROT_CAP_CRC = 1 << MLX5DV_SIG_TYPE_CRC, +}; + +enum mlx5dv_sig_t10dif_bg_type { + MLX5DV_SIG_T10DIF_CRC, + MLX5DV_SIG_T10DIF_CSUM, +}; + +enum mlx5dv_sig_t10dif_bg_caps { + MLX5DV_SIG_T10DIF_BG_CAP_CRC = 1 << MLX5DV_SIG_T10DIF_CRC, + MLX5DV_SIG_T10DIF_BG_CAP_CSUM = 1 << MLX5DV_SIG_T10DIF_CSUM, +}; + +enum mlx5dv_sig_crc_type { + MLX5DV_SIG_CRC_TYPE_CRC32, + MLX5DV_SIG_CRC_TYPE_CRC32C, + MLX5DV_SIG_CRC_TYPE_CRC64_XP10, +}; + +enum mlx5dv_sig_crc_type_caps { + MLX5DV_SIG_CRC_TYPE_CAP_CRC32 = 1 << MLX5DV_SIG_CRC_TYPE_CRC32, + MLX5DV_SIG_CRC_TYPE_CAP_CRC32C = 1 << MLX5DV_SIG_CRC_TYPE_CRC32C, + MLX5DV_SIG_CRC_TYPE_CAP_CRC64_XP10 = 1 << MLX5DV_SIG_CRC_TYPE_CRC64_XP10, +}; + +enum mlx5dv_block_size { + MLX5DV_BLOCK_SIZE_512, + MLX5DV_BLOCK_SIZE_520, + MLX5DV_BLOCK_SIZE_4048, + MLX5DV_BLOCK_SIZE_4096, + MLX5DV_BLOCK_SIZE_4160, +}; + +enum mlx5dv_block_size_caps { + MLX5DV_BLOCK_SIZE_CAP_512 = 1 << MLX5DV_BLOCK_SIZE_512, + MLX5DV_BLOCK_SIZE_CAP_520 = 1 << MLX5DV_BLOCK_SIZE_520, + MLX5DV_BLOCK_SIZE_CAP_4048 = 1 << MLX5DV_BLOCK_SIZE_4048, + MLX5DV_BLOCK_SIZE_CAP_4096 = 1 << MLX5DV_BLOCK_SIZE_4096, + MLX5DV_BLOCK_SIZE_CAP_4160 = 1 << MLX5DV_BLOCK_SIZE_4160, +}; + +struct mlx5dv_sig_caps { + uint64_t block_size; /* use enum mlx5dv_block_size_caps */ + uint32_t block_prot; /* use enum mlx5dv_sig_prot_caps */ + uint16_t t10dif_bg; /* use enum mlx5dv_sig_t10dif_bg_caps */ + uint16_t crc_type; /* use enum mlx5dv_sig_crc_type_caps */ +}; + +enum mlx5dv_crypto_engines_caps { + MLX5DV_CRYPTO_ENGINES_CAP_AES_XTS = 1 << 0, + MLX5DV_CRYPTO_ENGINES_CAP_AES_XTS_SINGLE_BLOCK = 1 << 1, + MLX5DV_CRYPTO_ENGINES_CAP_AES_XTS_MULTI_BLOCK = 1 << 2, +}; + +enum mlx5dv_crypto_wrapped_import_method_caps { + MLX5DV_CRYPTO_WRAPPED_IMPORT_METHOD_CAP_AES_XTS = 1 << 0, +}; + +enum mlx5dv_crypto_caps_flags { + MLX5DV_CRYPTO_CAPS_CRYPTO = 1 << 0, + MLX5DV_CRYPTO_CAPS_WRAPPED_CRYPTO_OPERATIONAL = 1 << 1, + MLX5DV_CRYPTO_CAPS_WRAPPED_CRYPTO_GOING_TO_COMMISSIONING = 1 << 2, +}; + +struct mlx5dv_crypto_caps { + /* + * if failed_selftests != 0 it means there are some self tests errors + * that may render specific crypto engines unusable. Exact code meaning + * should be consulted with NVIDIA. + */ + uint16_t failed_selftests; + uint8_t crypto_engines; /* use enum mlx5dv_crypto_engines_caps */ + uint8_t wrapped_import_method; /* use enum mlx5dv_crypto_wrapped_import_method_caps */ + uint8_t log_max_num_deks; + uint32_t flags; /* use enum mlx5dv_crypto_caps_flags */ +}; + +struct mlx5dv_devx_port { + uint64_t comp_mask; + uint16_t vport_num; + uint16_t vport_vhca_id; + uint16_t esw_owner_vhca_id; + uint64_t icm_addr_rx; + uint64_t icm_addr_tx; + struct mlx5dv_reg reg_c_0; +}; + +/* + * Direct verbs device-specific attributes + */ +struct mlx5dv_context { + uint8_t version; + uint64_t flags; + uint64_t comp_mask; + struct mlx5dv_cqe_comp_caps cqe_comp_caps; + struct mlx5dv_sw_parsing_caps sw_parsing_caps; + struct mlx5dv_striding_rq_caps striding_rq_caps; + uint32_t tunnel_offloads_caps; + uint32_t max_dynamic_bfregs; + uint64_t max_clock_info_update_nsec; + uint32_t flow_action_flags; /* use enum mlx5dv_flow_action_cap_flags */ + uint32_t dc_odp_caps; /* use enum ibv_odp_transport_cap_bits */ + void *hca_core_clock; + uint8_t num_lag_ports; + struct mlx5dv_sig_caps sig_caps; + struct mlx5dv_dci_streams_caps dci_streams_caps; + size_t max_wr_memcpy_length; + struct mlx5dv_crypto_caps crypto_caps; +}; + +enum mlx5dv_context_flags { + /* + * This flag indicates if CQE version 0 or 1 is needed. + */ + MLX5DV_CONTEXT_FLAGS_CQE_V1 = (1 << 0), + MLX5DV_CONTEXT_FLAGS_OBSOLETE = (1 << 1), /* Obsoleted, don't use */ + MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED = (1 << 2), + MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW = (1 << 3), + MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP = (1 << 4), /* Support CQE 128B compression */ + MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD = (1 << 5), /* Support CQE 128B padding */ + MLX5DV_CONTEXT_FLAGS_PACKET_BASED_CREDIT_MODE = (1 << 6), + MLX5DV_CONTEXT_FLAGS_REAL_TIME_TS = (1 << 7), +}; + +enum mlx5dv_cq_init_attr_mask { + MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE = 1 << 0, + MLX5DV_CQ_INIT_ATTR_MASK_FLAGS = 1 << 1, + MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE = 1 << 2, +}; + +enum mlx5dv_cq_init_attr_flags { + MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD = 1 << 0, + MLX5DV_CQ_INIT_ATTR_FLAGS_RESERVED = 1 << 1, +}; + +struct mlx5dv_cq_init_attr { + uint64_t comp_mask; /* Use enum mlx5dv_cq_init_attr_mask */ + uint8_t cqe_comp_res_format; /* Use enum mlx5dv_cqe_comp_res_format */ + uint32_t flags; /* Use enum mlx5dv_cq_init_attr_flags */ + uint16_t cqe_size; /* when MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE set */ +}; + +struct ibv_cq_ex *mlx5dv_create_cq(struct ibv_context *context, + struct ibv_cq_init_attr_ex *cq_attr, + struct mlx5dv_cq_init_attr *mlx5_cq_attr); + +enum mlx5dv_qp_create_flags { + MLX5DV_QP_CREATE_TUNNEL_OFFLOADS = 1 << 0, + MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC = 1 << 1, + MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_MC = 1 << 2, + MLX5DV_QP_CREATE_DISABLE_SCATTER_TO_CQE = 1 << 3, + MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE = 1 << 4, + MLX5DV_QP_CREATE_PACKET_BASED_CREDIT_MODE = 1 << 5, + MLX5DV_QP_CREATE_SIG_PIPELINING = 1 << 6, +}; + +enum mlx5dv_mkey_init_attr_flags { + MLX5DV_MKEY_INIT_ATTR_FLAGS_INDIRECT = 1 << 0, + MLX5DV_MKEY_INIT_ATTR_FLAGS_BLOCK_SIGNATURE = 1 << 1, + MLX5DV_MKEY_INIT_ATTR_FLAGS_CRYPTO = 1 << 2, + MLX5DV_MKEY_INIT_ATTR_FLAGS_UPDATE_TAG = 1 << 3, + MLX5DV_MKEY_INIT_ATTR_FLAGS_REMOTE_INVALIDATE = 1 << 4, +}; + +struct mlx5dv_mkey_init_attr { + struct ibv_pd *pd; + uint32_t create_flags; /* Use enum mlx5dv_mkey_init_attr_flags */ + uint16_t max_entries; /* Requested max number of pointed entries by this indirect mkey */ +}; + +struct mlx5dv_mkey { + uint32_t lkey; + uint32_t rkey; +}; + +struct mlx5dv_mkey *mlx5dv_create_mkey(struct mlx5dv_mkey_init_attr *mkey_init_attr); +int mlx5dv_destroy_mkey(struct mlx5dv_mkey *mkey); + +enum mlx5dv_qp_init_attr_mask { + MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0, + MLX5DV_QP_INIT_ATTR_MASK_DC = 1 << 1, + MLX5DV_QP_INIT_ATTR_MASK_SEND_OPS_FLAGS = 1 << 2, + MLX5DV_QP_INIT_ATTR_MASK_DCI_STREAMS = 1 << 3, +}; + +enum mlx5dv_dc_type { + MLX5DV_DCTYPE_DCT = 1, + MLX5DV_DCTYPE_DCI, +}; + +struct mlx5dv_dci_streams { + uint8_t log_num_concurent; + uint8_t log_num_errored; +}; + +struct mlx5dv_dc_init_attr { + enum mlx5dv_dc_type dc_type; + union { + uint64_t dct_access_key; + struct mlx5dv_dci_streams dci_streams; + }; +}; + +enum mlx5dv_qp_create_send_ops_flags { + MLX5DV_QP_EX_WITH_MR_INTERLEAVED = 1 << 0, + MLX5DV_QP_EX_WITH_MR_LIST = 1 << 1, + MLX5DV_QP_EX_WITH_MKEY_CONFIGURE = 1 << 2, + MLX5DV_QP_EX_WITH_RAW_WQE = 1 << 3, + MLX5DV_QP_EX_WITH_MEMCPY = 1 << 4, +}; + +struct mlx5dv_qp_init_attr { + uint64_t comp_mask; /* Use enum mlx5dv_qp_init_attr_mask */ + uint32_t create_flags; /* Use enum mlx5dv_qp_create_flags */ + struct mlx5dv_dc_init_attr dc_init_attr; + uint64_t send_ops_flags; /* Use enum mlx5dv_qp_create_send_ops_flags */ +}; + +struct ibv_qp *mlx5dv_create_qp(struct ibv_context *context, + struct ibv_qp_init_attr_ex *qp_attr, + struct mlx5dv_qp_init_attr *mlx5_qp_attr); + +struct mlx5dv_mr_interleaved { + uint64_t addr; + uint32_t bytes_count; + uint32_t bytes_skip; + uint32_t lkey; +}; + +enum mlx5dv_sig_t10dif_flags { + MLX5DV_SIG_T10DIF_FLAG_REF_REMAP = 1 << 0, + MLX5DV_SIG_T10DIF_FLAG_APP_ESCAPE = 1 << 1, + MLX5DV_SIG_T10DIF_FLAG_APP_REF_ESCAPE = 1 << 2, +}; + +struct mlx5dv_sig_t10dif { + enum mlx5dv_sig_t10dif_bg_type bg_type; + uint16_t bg; + uint16_t app_tag; + uint32_t ref_tag; + uint16_t flags; /* Use enum mlx5dv_sig_t10dif_flags */ +}; + +struct mlx5dv_sig_crc { + enum mlx5dv_sig_crc_type type; + uint64_t seed; +}; + +struct mlx5dv_sig_block_domain { + enum mlx5dv_sig_type sig_type; + union { + const struct mlx5dv_sig_t10dif *dif; + const struct mlx5dv_sig_crc *crc; + } sig; + enum mlx5dv_block_size block_size; + uint64_t comp_mask; +}; + +enum mlx5dv_sig_mask { + MLX5DV_SIG_MASK_T10DIF_GUARD = 0xc0, + MLX5DV_SIG_MASK_T10DIF_APPTAG = 0x30, + MLX5DV_SIG_MASK_T10DIF_REFTAG = 0x0f, + MLX5DV_SIG_MASK_CRC32 = 0xf0, + MLX5DV_SIG_MASK_CRC32C = MLX5DV_SIG_MASK_CRC32, + MLX5DV_SIG_MASK_CRC64_XP10 = 0xff, +}; + +enum mlx5dv_sig_block_attr_flags { + MLX5DV_SIG_BLOCK_ATTR_FLAG_COPY_MASK = 1 << 0, +}; + +struct mlx5dv_sig_block_attr { + const struct mlx5dv_sig_block_domain *mem; + const struct mlx5dv_sig_block_domain *wire; + uint32_t flags; /* Use enum mlx5dv_sig_block_attr_flags */ + uint8_t check_mask; + uint8_t copy_mask; + uint64_t comp_mask; +}; + +enum mlx5dv_crypto_standard { + MLX5DV_CRYPTO_STANDARD_AES_XTS, +}; + +enum mlx5dv_signature_crypto_order { + MLX5DV_SIGNATURE_CRYPTO_ORDER_SIGNATURE_AFTER_CRYPTO_ON_TX, + MLX5DV_SIGNATURE_CRYPTO_ORDER_SIGNATURE_BEFORE_CRYPTO_ON_TX, +}; + +struct mlx5dv_crypto_attr { + enum mlx5dv_crypto_standard crypto_standard; + bool encrypt_on_tx; + enum mlx5dv_signature_crypto_order signature_crypto_order; + enum mlx5dv_block_size data_unit_size; + char initial_tweak[16]; + struct mlx5dv_dek *dek; + char keytag[8]; + uint64_t comp_mask; +}; + +enum mlx5dv_mkey_conf_flags { + MLX5DV_MKEY_CONF_FLAG_RESET_SIG_ATTR = 1 << 0, +}; + +struct mlx5dv_mkey_conf_attr { + uint32_t conf_flags; /* Use enum mlx5dv_mkey_conf_flags */ + uint64_t comp_mask; +}; + +enum mlx5dv_wc_opcode { + MLX5DV_WC_UMR = IBV_WC_DRIVER1, + MLX5DV_WC_RAW_WQE = IBV_WC_DRIVER2, + MLX5DV_WC_MEMCPY = IBV_WC_DRIVER3, +}; + +struct mlx5dv_qp_ex { + uint64_t comp_mask; + /* + * Available just for the MLX5 DC QP type with send opcodes of type: + * rdma, atomic and send. + */ + void (*wr_set_dc_addr)(struct mlx5dv_qp_ex *mqp, struct ibv_ah *ah, + uint32_t remote_dctn, uint64_t remote_dc_key); + void (*wr_mr_interleaved)(struct mlx5dv_qp_ex *mqp, + struct mlx5dv_mkey *mkey, + uint32_t access_flags, /* use enum ibv_access_flags */ + uint32_t repeat_count, + uint16_t num_interleaved, + struct mlx5dv_mr_interleaved *data); + void (*wr_mr_list)(struct mlx5dv_qp_ex *mqp, + struct mlx5dv_mkey *mkey, + uint32_t access_flags, /* use enum ibv_access_flags */ + uint16_t num_sges, + struct ibv_sge *sge); + void (*wr_mkey_configure)(struct mlx5dv_qp_ex *mqp, + struct mlx5dv_mkey *mkey, + uint8_t num_setters, + struct mlx5dv_mkey_conf_attr *attr); + void (*wr_set_mkey_access_flags)(struct mlx5dv_qp_ex *mqp, + uint32_t access_flags); + void (*wr_set_mkey_layout_list)(struct mlx5dv_qp_ex *mqp, + uint16_t num_sges, + const struct ibv_sge *sge); + void (*wr_set_mkey_layout_interleaved)( + struct mlx5dv_qp_ex *mqp, + uint32_t repeat_count, + uint16_t num_interleaved, + const struct mlx5dv_mr_interleaved *data); + void (*wr_set_mkey_sig_block)(struct mlx5dv_qp_ex *mqp, + const struct mlx5dv_sig_block_attr *attr); + void (*wr_raw_wqe)(struct mlx5dv_qp_ex *mqp, const void *wqe); + void (*wr_set_dc_addr_stream)(struct mlx5dv_qp_ex *mqp, + struct ibv_ah *ah, + uint32_t remote_dctn, + uint64_t remote_dc_key, + uint16_t stream_id); + void (*wr_memcpy)(struct mlx5dv_qp_ex *mqp, + uint32_t dest_lkey, uint64_t dest_addr, + uint32_t src_lkey, uint64_t src_addr, + size_t length); + void (*wr_set_mkey_crypto)(struct mlx5dv_qp_ex *mqp, + const struct mlx5dv_crypto_attr *attr); +}; + +struct mlx5dv_qp_ex *mlx5dv_qp_ex_from_ibv_qp_ex(struct ibv_qp_ex *qp); + +static inline void mlx5dv_wr_set_dc_addr(struct mlx5dv_qp_ex *mqp, + struct ibv_ah *ah, + uint32_t remote_dctn, + uint64_t remote_dc_key) +{ + mqp->wr_set_dc_addr(mqp, ah, remote_dctn, remote_dc_key); +} + +static inline void mlx5dv_wr_set_dc_addr_stream(struct mlx5dv_qp_ex *mqp, + struct ibv_ah *ah, + uint32_t remote_dctn, + uint64_t remote_dc_key, + uint16_t stream_id) +{ + mqp->wr_set_dc_addr_stream(mqp, ah, remote_dctn, + remote_dc_key, stream_id); +} + +static inline void mlx5dv_wr_mr_interleaved(struct mlx5dv_qp_ex *mqp, + struct mlx5dv_mkey *mkey, + uint32_t access_flags, + uint32_t repeat_count, + uint16_t num_interleaved, + struct mlx5dv_mr_interleaved *data) +{ + mqp->wr_mr_interleaved(mqp, mkey, access_flags, repeat_count, + num_interleaved, data); +} + +static inline void mlx5dv_wr_mr_list(struct mlx5dv_qp_ex *mqp, + struct mlx5dv_mkey *mkey, + uint32_t access_flags, + uint16_t num_sges, + struct ibv_sge *sge) +{ + mqp->wr_mr_list(mqp, mkey, access_flags, num_sges, sge); +} + +static inline void mlx5dv_wr_mkey_configure(struct mlx5dv_qp_ex *mqp, + struct mlx5dv_mkey *mkey, + uint8_t num_setters, + struct mlx5dv_mkey_conf_attr *attr) +{ + mqp->wr_mkey_configure(mqp, mkey, num_setters, attr); +} + +static inline void mlx5dv_wr_set_mkey_access_flags(struct mlx5dv_qp_ex *mqp, + uint32_t access_flags) +{ + mqp->wr_set_mkey_access_flags(mqp, access_flags); +} + +static inline void mlx5dv_wr_set_mkey_layout_list(struct mlx5dv_qp_ex *mqp, + uint16_t num_sges, + const struct ibv_sge *sge) +{ + mqp->wr_set_mkey_layout_list(mqp, num_sges, sge); +} + +static inline void mlx5dv_wr_set_mkey_layout_interleaved(struct mlx5dv_qp_ex *mqp, + uint32_t repeat_count, + uint16_t num_interleaved, + const struct mlx5dv_mr_interleaved *data) +{ + mqp->wr_set_mkey_layout_interleaved(mqp, repeat_count, + num_interleaved, data); +} + +static inline void mlx5dv_wr_set_mkey_sig_block(struct mlx5dv_qp_ex *mqp, + const struct mlx5dv_sig_block_attr *attr) +{ + mqp->wr_set_mkey_sig_block(mqp, attr); +} + +static inline void +mlx5dv_wr_set_mkey_crypto(struct mlx5dv_qp_ex *mqp, + const struct mlx5dv_crypto_attr *attr) +{ + mqp->wr_set_mkey_crypto(mqp, attr); +} + +static inline void mlx5dv_wr_memcpy(struct mlx5dv_qp_ex *mqp, + uint32_t dest_lkey, uint64_t dest_addr, + uint32_t src_lkey, uint64_t src_addr, + size_t length) +{ + mqp->wr_memcpy(mqp, dest_lkey, dest_addr, src_lkey, src_addr, length); +} + +enum mlx5dv_mkey_err_type { + MLX5DV_MKEY_NO_ERR, + MLX5DV_MKEY_SIG_BLOCK_BAD_GUARD, + MLX5DV_MKEY_SIG_BLOCK_BAD_REFTAG, + MLX5DV_MKEY_SIG_BLOCK_BAD_APPTAG, +}; + +struct mlx5dv_sig_err { + uint64_t actual_value; + uint64_t expected_value; + uint64_t offset; +}; + +struct mlx5dv_mkey_err { + enum mlx5dv_mkey_err_type err_type; + union { + struct mlx5dv_sig_err sig; + } err; +}; + +int _mlx5dv_mkey_check(struct mlx5dv_mkey *mkey, + struct mlx5dv_mkey_err *err_info, + size_t err_info_size); + +static inline int mlx5dv_mkey_check(struct mlx5dv_mkey *mkey, + struct mlx5dv_mkey_err *err_info) +{ + return _mlx5dv_mkey_check(mkey, err_info, sizeof(*err_info)); +} + +int mlx5dv_qp_cancel_posted_send_wrs(struct mlx5dv_qp_ex *mqp, uint64_t wr_id); + +static inline void mlx5dv_wr_raw_wqe(struct mlx5dv_qp_ex *mqp, const void *wqe) +{ + mqp->wr_raw_wqe(mqp, wqe); +} + +struct mlx5dv_crypto_login_obj; + +struct mlx5dv_crypto_login_attr { + uint32_t credential_id; + uint32_t import_kek_id; + char credential[48]; + uint64_t comp_mask; +}; + +struct mlx5dv_crypto_login_attr_ex { + uint32_t credential_id; + uint32_t import_kek_id; + const void *credential; + size_t credential_len; + uint64_t comp_mask; +}; +enum mlx5dv_crypto_login_state { + MLX5DV_CRYPTO_LOGIN_STATE_VALID, + MLX5DV_CRYPTO_LOGIN_STATE_NO_LOGIN, + MLX5DV_CRYPTO_LOGIN_STATE_INVALID, +}; + +struct mlx5dv_crypto_login_query_attr { + enum mlx5dv_crypto_login_state state; + uint64_t comp_mask; +}; + +int mlx5dv_crypto_login(struct ibv_context *context, + struct mlx5dv_crypto_login_attr *login_attr); + +int mlx5dv_crypto_login_query_state(struct ibv_context *context, + enum mlx5dv_crypto_login_state *state); + +int mlx5dv_crypto_logout(struct ibv_context *context); + +struct mlx5dv_crypto_login_obj * +mlx5dv_crypto_login_create(struct ibv_context *context, + struct mlx5dv_crypto_login_attr_ex *login_attr); + +int mlx5dv_crypto_login_query(struct mlx5dv_crypto_login_obj *crypto_login, + struct mlx5dv_crypto_login_query_attr *query_attr); + +int mlx5dv_crypto_login_destroy(struct mlx5dv_crypto_login_obj *crypto_login); + +enum mlx5dv_crypto_key_size { + MLX5DV_CRYPTO_KEY_SIZE_128, + MLX5DV_CRYPTO_KEY_SIZE_256, +}; + +enum mlx5dv_crypto_key_purpose { + MLX5DV_CRYPTO_KEY_PURPOSE_AES_XTS, +}; + +enum mlx5dv_dek_state { + MLX5DV_DEK_STATE_READY, + MLX5DV_DEK_STATE_ERROR, +}; + +enum mlx5dv_dek_init_attr_mask { + MLX5DV_DEK_INIT_ATTR_CRYPTO_LOGIN = 1 << 0, +}; + +struct mlx5dv_dek_init_attr { + enum mlx5dv_crypto_key_size key_size; + bool has_keytag; + enum mlx5dv_crypto_key_purpose key_purpose; + struct ibv_pd *pd; + char opaque[8]; + char key[128]; + uint64_t comp_mask; + struct mlx5dv_crypto_login_obj *crypto_login; +}; + +struct mlx5dv_dek_attr { + enum mlx5dv_dek_state state; + char opaque[8]; + uint64_t comp_mask; +}; + +struct mlx5dv_dek; + +struct mlx5dv_dek *mlx5dv_dek_create(struct ibv_context *context, + struct mlx5dv_dek_init_attr *init_attr); + +int mlx5dv_dek_query(struct mlx5dv_dek *dek, struct mlx5dv_dek_attr *attr); + +int mlx5dv_dek_destroy(struct mlx5dv_dek *dek); + +enum mlx5dv_flow_action_esp_mask { + MLX5DV_FLOW_ACTION_ESP_MASK_FLAGS = 1 << 0, +}; + +struct mlx5dv_flow_action_esp { + uint64_t comp_mask; /* Use enum mlx5dv_flow_action_esp_mask */ + uint32_t action_flags; /* Use enum mlx5dv_flow_action_flags */ +}; + +struct mlx5dv_flow_match_parameters { + size_t match_sz; + uint64_t match_buf[]; /* Device spec format */ +}; + +enum mlx5dv_flow_matcher_attr_mask { + MLX5DV_FLOW_MATCHER_MASK_FT_TYPE = 1 << 0, +}; + +struct mlx5dv_flow_matcher_attr { + enum ibv_flow_attr_type type; + uint32_t flags; /* From enum ibv_flow_flags */ + uint16_t priority; + uint8_t match_criteria_enable; /* Device spec format */ + struct mlx5dv_flow_match_parameters *match_mask; + uint64_t comp_mask; /* use mlx5dv_flow_matcher_attr_mask */ + enum mlx5dv_flow_table_type ft_type; +}; + +struct mlx5dv_flow_matcher; + +struct mlx5dv_flow_matcher * +mlx5dv_create_flow_matcher(struct ibv_context *context, + struct mlx5dv_flow_matcher_attr *matcher_attr); + +int mlx5dv_destroy_flow_matcher(struct mlx5dv_flow_matcher *matcher); + +struct mlx5dv_steering_anchor_attr { + enum mlx5dv_flow_table_type ft_type; + uint16_t priority; + uint64_t comp_mask; +}; + +struct mlx5dv_steering_anchor { + uint32_t id; +}; + +struct mlx5dv_steering_anchor * +mlx5dv_create_steering_anchor(struct ibv_context *context, + struct mlx5dv_steering_anchor_attr *attr); +int mlx5dv_destroy_steering_anchor(struct mlx5dv_steering_anchor *sa); + +enum mlx5dv_flow_action_type { + MLX5DV_FLOW_ACTION_DEST_IBV_QP, + MLX5DV_FLOW_ACTION_DROP, + MLX5DV_FLOW_ACTION_IBV_COUNTER, + MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION, + MLX5DV_FLOW_ACTION_TAG, + MLX5DV_FLOW_ACTION_DEST_DEVX, + MLX5DV_FLOW_ACTION_COUNTERS_DEVX, + MLX5DV_FLOW_ACTION_DEFAULT_MISS, +}; + +struct mlx5dv_flow_action_attr { + enum mlx5dv_flow_action_type type; + union { + struct ibv_qp *qp; + struct ibv_counters *counter; + struct ibv_flow_action *action; + uint32_t tag_value; + struct mlx5dv_devx_obj *obj; + }; +}; + +struct ibv_flow * +mlx5dv_create_flow(struct mlx5dv_flow_matcher *matcher, + struct mlx5dv_flow_match_parameters *match_value, + size_t num_actions, + struct mlx5dv_flow_action_attr actions_attr[]); + +struct ibv_flow_action *mlx5dv_create_flow_action_esp(struct ibv_context *ctx, + struct ibv_flow_action_esp_attr *esp, + struct mlx5dv_flow_action_esp *mlx5_attr); + +/* + * mlx5dv_create_flow_action_modify_header - Create a flow action which mutates + * a packet. The flow action can be attached to steering rules via + * ibv_create_flow(). + * + * @ctx: RDMA device context to create the action on. + * @actions_sz: The size of *actions* buffer in bytes. + * @actions: A buffer which contains modify actions provided in device spec + * format. + * @ft_type: Defines the flow table type to which the modify + * header action will be attached. + * + * Return a valid ibv_flow_action if successful, NULL otherwise. + */ +struct ibv_flow_action * +mlx5dv_create_flow_action_modify_header(struct ibv_context *ctx, + size_t actions_sz, + uint64_t actions[], + enum mlx5dv_flow_table_type ft_type); + +/* + * mlx5dv_create_flow_action_packet_reformat - Create flow action which can + * encap/decap packets. + */ +struct ibv_flow_action * +mlx5dv_create_flow_action_packet_reformat(struct ibv_context *ctx, + size_t data_sz, + void *data, + enum mlx5dv_flow_action_packet_reformat_type reformat_type, + enum mlx5dv_flow_table_type ft_type); +/* + * Most device capabilities are exported by ibv_query_device(...), + * but there is HW device-specific information which is important + * for data-path, but isn't provided. + * + * Return 0 on success. + */ +int mlx5dv_query_device(struct ibv_context *ctx_in, + struct mlx5dv_context *attrs_out); + +int mlx5dv_map_ah_to_qp(struct ibv_ah *ah, uint32_t qp_num); + +enum mlx5dv_qp_comp_mask { + MLX5DV_QP_MASK_UAR_MMAP_OFFSET = 1 << 0, + MLX5DV_QP_MASK_RAW_QP_HANDLES = 1 << 1, + MLX5DV_QP_MASK_RAW_QP_TIR_ADDR = 1 << 2, +}; + +struct mlx5dv_qp { + __be32 *dbrec; + struct { + void *buf; + uint32_t wqe_cnt; + uint32_t stride; + } sq; + struct { + void *buf; + uint32_t wqe_cnt; + uint32_t stride; + } rq; + struct { + void *reg; + uint32_t size; + } bf; + uint64_t comp_mask; + off_t uar_mmap_offset; + uint32_t tirn; + uint32_t tisn; + uint32_t rqn; + uint32_t sqn; + uint64_t tir_icm_addr; +}; + +struct mlx5dv_cq { + void *buf; + __be32 *dbrec; + uint32_t cqe_cnt; + uint32_t cqe_size; + void *cq_uar; + uint32_t cqn; + uint64_t comp_mask; +}; + +enum mlx5dv_srq_comp_mask { + MLX5DV_SRQ_MASK_SRQN = 1 << 0, +}; + +struct mlx5dv_srq { + void *buf; + __be32 *dbrec; + uint32_t stride; + uint32_t head; + uint32_t tail; + uint64_t comp_mask; + uint32_t srqn; +}; + +struct mlx5dv_rwq { + void *buf; + __be32 *dbrec; + uint32_t wqe_cnt; + uint32_t stride; + uint64_t comp_mask; +}; + +struct mlx5dv_alloc_dm_attr { + enum mlx5dv_alloc_dm_type type; + uint64_t comp_mask; +}; + +enum mlx5dv_dm_comp_mask { + MLX5DV_DM_MASK_REMOTE_VA = 1 << 0, +}; + +struct mlx5dv_dm { + void *buf; + uint64_t length; + uint64_t comp_mask; + uint64_t remote_va; +}; + +struct ibv_dm *mlx5dv_alloc_dm(struct ibv_context *context, + struct ibv_alloc_dm_attr *dm_attr, + struct mlx5dv_alloc_dm_attr *mlx5_dm_attr); + +void *mlx5dv_dm_map_op_addr(struct ibv_dm *dm, uint8_t op); + +struct mlx5_wqe_av; + +struct mlx5dv_ah { + struct mlx5_wqe_av *av; + uint64_t comp_mask; +}; + +struct mlx5dv_pd { + uint32_t pdn; + uint64_t comp_mask; +}; + +struct mlx5dv_obj { + struct { + struct ibv_qp *in; + struct mlx5dv_qp *out; + } qp; + struct { + struct ibv_cq *in; + struct mlx5dv_cq *out; + } cq; + struct { + struct ibv_srq *in; + struct mlx5dv_srq *out; + } srq; + struct { + struct ibv_wq *in; + struct mlx5dv_rwq *out; + } rwq; + struct { + struct ibv_dm *in; + struct mlx5dv_dm *out; + } dm; + struct { + struct ibv_ah *in; + struct mlx5dv_ah *out; + } ah; + struct { + struct ibv_pd *in; + struct mlx5dv_pd *out; + } pd; +}; + +enum mlx5dv_obj_type { + MLX5DV_OBJ_QP = 1 << 0, + MLX5DV_OBJ_CQ = 1 << 1, + MLX5DV_OBJ_SRQ = 1 << 2, + MLX5DV_OBJ_RWQ = 1 << 3, + MLX5DV_OBJ_DM = 1 << 4, + MLX5DV_OBJ_AH = 1 << 5, + MLX5DV_OBJ_PD = 1 << 6, +}; + +enum mlx5dv_wq_init_attr_mask { + MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ = 1 << 0, +}; + +struct mlx5dv_striding_rq_init_attr { + uint32_t single_stride_log_num_of_bytes; + uint32_t single_wqe_log_num_of_strides; + uint8_t two_byte_shift_en; +}; + +struct mlx5dv_wq_init_attr { + uint64_t comp_mask; /* Use enum mlx5dv_wq_init_attr_mask */ + struct mlx5dv_striding_rq_init_attr striding_rq_attrs; +}; + +/* + * This function creates a work queue object with extra properties + * defined by mlx5dv_wq_init_attr struct. + * + * For each bit in the comp_mask, a field in mlx5dv_wq_init_attr + * should follow. + * + * MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ: Create a work queue with + * striding RQ capabilities. + * - single_stride_log_num_of_bytes represents the size of each stride in the + * WQE and its value should be between min_single_stride_log_num_of_bytes + * and max_single_stride_log_num_of_bytes that are reported in + * mlx5dv_query_device. + * - single_wqe_log_num_of_strides represents the number of strides in each WQE. + * Its value should be between min_single_wqe_log_num_of_strides and + * max_single_wqe_log_num_of_strides that are reported in mlx5dv_query_device. + * - two_byte_shift_en: When enabled, hardware pads 2 bytes of zeroes + * before writing the message to memory (e.g. for IP alignment) + */ +struct ibv_wq *mlx5dv_create_wq(struct ibv_context *context, + struct ibv_wq_init_attr *wq_init_attr, + struct mlx5dv_wq_init_attr *mlx5_wq_attr); +/* + * This function will initialize mlx5dv_xxx structs based on supplied type. + * The information for initialization is taken from ibv_xx structs supplied + * as part of input. + * + * Request information of CQ marks its owned by DV for all consumer index + * related actions. + * + * The initialization type can be combination of several types together. + * + * Return: 0 in case of success. + */ +int mlx5dv_init_obj(struct mlx5dv_obj *obj, uint64_t obj_type); + +enum { + MLX5_OPCODE_NOP = 0x00, + MLX5_OPCODE_SEND_INVAL = 0x01, + MLX5_OPCODE_RDMA_WRITE = 0x08, + MLX5_OPCODE_RDMA_WRITE_IMM = 0x09, + MLX5_OPCODE_SEND = 0x0a, + MLX5_OPCODE_SEND_IMM = 0x0b, + MLX5_OPCODE_TSO = 0x0e, + MLX5_OPCODE_RDMA_READ = 0x10, + MLX5_OPCODE_ATOMIC_CS = 0x11, + MLX5_OPCODE_ATOMIC_FA = 0x12, + MLX5_OPCODE_ATOMIC_MASKED_CS = 0x14, + MLX5_OPCODE_ATOMIC_MASKED_FA = 0x15, + MLX5_OPCODE_FMR = 0x19, + MLX5_OPCODE_LOCAL_INVAL = 0x1b, + MLX5_OPCODE_CONFIG_CMD = 0x1f, + MLX5_OPCODE_SET_PSV = 0x20, + MLX5_OPCODE_UMR = 0x25, + MLX5_OPCODE_TAG_MATCHING = 0x28, + MLX5_OPCODE_FLOW_TBL_ACCESS = 0x2c, + MLX5_OPCODE_MMO = 0x2F, +}; + +/* + * CQE related part + */ + +enum { + MLX5_INLINE_SCATTER_32 = 0x4, + MLX5_INLINE_SCATTER_64 = 0x8, +}; + +enum { + MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01, + MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02, + MLX5_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04, + MLX5_CQE_SYNDROME_WR_FLUSH_ERR = 0x05, + MLX5_CQE_SYNDROME_MW_BIND_ERR = 0x06, + MLX5_CQE_SYNDROME_BAD_RESP_ERR = 0x10, + MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11, + MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, + MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13, + MLX5_CQE_SYNDROME_REMOTE_OP_ERR = 0x14, + MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15, + MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16, + MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, +}; + +enum { + MLX5_CQE_VENDOR_SYNDROME_ODP_PFAULT = 0x93, +}; + +enum { + MLX5_CQE_L2_OK = 1 << 0, + MLX5_CQE_L3_OK = 1 << 1, + MLX5_CQE_L4_OK = 1 << 2, +}; + +enum { + MLX5_CQE_L3_HDR_TYPE_NONE = 0x0, + MLX5_CQE_L3_HDR_TYPE_IPV6 = 0x1, + MLX5_CQE_L3_HDR_TYPE_IPV4 = 0x2, +}; + +enum { + MLX5_CQE_OWNER_MASK = 1, + MLX5_CQE_REQ = 0, + MLX5_CQE_RESP_WR_IMM = 1, + MLX5_CQE_RESP_SEND = 2, + MLX5_CQE_RESP_SEND_IMM = 3, + MLX5_CQE_RESP_SEND_INV = 4, + MLX5_CQE_RESIZE_CQ = 5, + MLX5_CQE_NO_PACKET = 6, + MLX5_CQE_SIG_ERR = 12, + MLX5_CQE_REQ_ERR = 13, + MLX5_CQE_RESP_ERR = 14, + MLX5_CQE_INVALID = 15, +}; + +enum { + MLX5_CQ_DOORBELL = 0x20 +}; + +enum { + MLX5_CQ_DB_REQ_NOT_SOL = 1 << 24, + MLX5_CQ_DB_REQ_NOT = 0 << 24, +}; + +struct mlx5_err_cqe { + uint8_t rsvd0[32]; + uint32_t srqn; + uint8_t rsvd1[18]; + uint8_t vendor_err_synd; + uint8_t syndrome; + uint32_t s_wqe_opcode_qpn; + uint16_t wqe_counter; + uint8_t signature; + uint8_t op_own; +}; + +struct mlx5_tm_cqe { + __be32 success; + __be16 hw_phase_cnt; + uint8_t rsvd0[12]; +}; + +struct mlx5_cqe64 { + union { + struct { + uint8_t rsvd0[2]; + __be16 wqe_id; + uint8_t rsvd4[13]; + uint8_t ml_path; + uint8_t rsvd20[4]; + __be16 slid; + __be32 flags_rqpn; + uint8_t hds_ip_ext; + uint8_t l4_hdr_type_etc; + __be16 vlan_info; + }; + struct mlx5_tm_cqe tm_cqe; + /* TMH is scattered to CQE upon match */ + struct ibv_tmh tmh; + }; + __be32 srqn_uidx; + __be32 imm_inval_pkey; + uint8_t app; + uint8_t app_op; + __be16 app_info; + __be32 byte_cnt; + __be64 timestamp; + __be32 sop_drop_qpn; + __be16 wqe_counter; + uint8_t signature; + uint8_t op_own; +}; + +enum { + MLX5_TMC_SUCCESS = 0x80000000U, +}; + +enum mlx5dv_cqe_comp_res_format { + MLX5DV_CQE_RES_FORMAT_HASH = 1 << 0, + MLX5DV_CQE_RES_FORMAT_CSUM = 1 << 1, + MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX = 1 << 2, +}; + +enum mlx5dv_sw_parsing_offloads { + MLX5DV_SW_PARSING = 1 << 0, + MLX5DV_SW_PARSING_CSUM = 1 << 1, + MLX5DV_SW_PARSING_LSO = 1 << 2, +}; + +static MLX5DV_ALWAYS_INLINE +uint8_t mlx5dv_get_cqe_owner(struct mlx5_cqe64 *cqe) +{ + return cqe->op_own & 0x1; +} + +static MLX5DV_ALWAYS_INLINE +void mlx5dv_set_cqe_owner(struct mlx5_cqe64 *cqe, uint8_t val) +{ + cqe->op_own = (val & 0x1) | (cqe->op_own & ~0x1); +} + +/* Solicited event */ +static MLX5DV_ALWAYS_INLINE +uint8_t mlx5dv_get_cqe_se(struct mlx5_cqe64 *cqe) +{ + return (cqe->op_own >> 1) & 0x1; +} + +static MLX5DV_ALWAYS_INLINE +uint8_t mlx5dv_get_cqe_format(struct mlx5_cqe64 *cqe) +{ + return (cqe->op_own >> 2) & 0x3; +} + +static MLX5DV_ALWAYS_INLINE +uint8_t mlx5dv_get_cqe_opcode(struct mlx5_cqe64 *cqe) +{ + return cqe->op_own >> 4; +} + +/* + * WQE related part + */ +enum { + MLX5_INVALID_LKEY = 0x100, +}; + +enum { + MLX5_EXTENDED_UD_AV = 0x80000000, +}; + +enum { + MLX5_WQE_CTRL_CQ_UPDATE = 2 << 2, + MLX5_WQE_CTRL_SOLICITED = 1 << 1, + MLX5_WQE_CTRL_FENCE = 4 << 5, + MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE = 1 << 5, +}; + +enum { + MLX5_SEND_WQE_BB = 64, + MLX5_SEND_WQE_SHIFT = 6, +}; + +enum { + MLX5_INLINE_SEG = 0x80000000, +}; + +enum { + MLX5_ETH_WQE_L3_CSUM = (1 << 6), + MLX5_ETH_WQE_L4_CSUM = (1 << 7), +}; + +struct mlx5_wqe_srq_next_seg { + uint8_t rsvd0[2]; + __be16 next_wqe_index; + uint8_t signature; + uint8_t rsvd1[11]; +}; + +struct mlx5_wqe_data_seg { + __be32 byte_count; + __be32 lkey; + __be64 addr; +}; + +struct mlx5_wqe_ctrl_seg { + __be32 opmod_idx_opcode; + __be32 qpn_ds; + uint8_t signature; + __be16 dci_stream_channel_id; + uint8_t fm_ce_se; + __be32 imm; +} __attribute__((__packed__)) __attribute__((__aligned__(4))); + +struct mlx5_mprq_wqe { + struct mlx5_wqe_srq_next_seg nseg; + struct mlx5_wqe_data_seg dseg; +}; + +struct mlx5_wqe_av { + union { + struct { + __be32 qkey; + __be32 reserved; + } qkey; + __be64 dc_key; + } key; + __be32 dqp_dct; + uint8_t stat_rate_sl; + uint8_t fl_mlid; + __be16 rlid; + uint8_t reserved0[4]; + uint8_t rmac[6]; + uint8_t tclass; + uint8_t hop_limit; + __be32 grh_gid_fl; + uint8_t rgid[16]; +}; + +struct mlx5_wqe_datagram_seg { + struct mlx5_wqe_av av; +}; + +struct mlx5_wqe_raddr_seg { + __be64 raddr; + __be32 rkey; + __be32 reserved; +}; + +struct mlx5_wqe_atomic_seg { + __be64 swap_add; + __be64 compare; +}; + +struct mlx5_wqe_inl_data_seg { + uint32_t byte_count; +}; + +struct mlx5_wqe_eth_seg { + __be32 rsvd0; + uint8_t cs_flags; + uint8_t rsvd1; + __be16 mss; + __be32 rsvd2; + __be16 inline_hdr_sz; + uint8_t inline_hdr_start[2]; + uint8_t inline_hdr[16]; +}; + +struct mlx5_wqe_tm_seg { + uint8_t opcode; + uint8_t flags; + __be16 index; + uint8_t rsvd0[2]; + __be16 sw_cnt; + uint8_t rsvd1[8]; + __be64 append_tag; + __be64 append_mask; +}; + +enum { + MLX5_WQE_UMR_CTRL_FLAG_INLINE = 1 << 7, + MLX5_WQE_UMR_CTRL_FLAG_CHECK_FREE = 1 << 5, + MLX5_WQE_UMR_CTRL_FLAG_TRNSLATION_OFFSET = 1 << 4, + MLX5_WQE_UMR_CTRL_FLAG_CHECK_QPN = 1 << 3, +}; + +enum { + MLX5_WQE_UMR_CTRL_MKEY_MASK_LEN = 1 << 0, + MLX5_WQE_UMR_CTRL_MKEY_MASK_START_ADDR = 1 << 6, + MLX5_WQE_UMR_CTRL_MKEY_MASK_SIG_ERR = 1 << 9, + MLX5_WQE_UMR_CTRL_MKEY_MASK_BSF_ENABLE = 1 << 12, + MLX5_WQE_UMR_CTRL_MKEY_MASK_MKEY = 1 << 13, + MLX5_WQE_UMR_CTRL_MKEY_MASK_QPN = 1 << 14, + MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_LOCAL_WRITE = 1 << 18, + MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_REMOTE_READ = 1 << 19, + MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_REMOTE_WRITE = 1 << 20, + MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_ATOMIC = 1 << 21, + MLX5_WQE_UMR_CTRL_MKEY_MASK_FREE = 1 << 29, +}; + +struct mlx5_wqe_umr_ctrl_seg { + uint8_t flags; + uint8_t rsvd0[3]; + __be16 klm_octowords; + union { + __be16 translation_offset; + __be16 bsf_octowords; + }; + __be64 mkey_mask; + uint8_t rsvd1[32]; +}; + +struct mlx5_wqe_umr_klm_seg { + /* up to 2GB */ + __be32 byte_count; + __be32 mkey; + __be64 address; +}; + +union mlx5_wqe_umr_inline_seg { + struct mlx5_wqe_umr_klm_seg klm; +}; + +struct mlx5_wqe_umr_repeat_ent_seg { + __be16 stride; + __be16 byte_count; + __be32 memkey; + __be64 va; +}; + +struct mlx5_wqe_umr_repeat_block_seg { + __be32 byte_count; + __be32 op; + __be32 repeat_count; + __be16 reserved; + __be16 num_ent; + struct mlx5_wqe_umr_repeat_ent_seg entries[0]; +}; + +enum { + MLX5_WQE_MKEY_CONTEXT_FREE = 1 << 6 +}; + +enum { + MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_ATOMIC = 1 << 6, + MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_REMOTE_WRITE = 1 << 5, + MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_REMOTE_READ = 1 << 4, + MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_LOCAL_WRITE = 1 << 3, + MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_LOCAL_READ = 1 << 2 +}; + +struct mlx5_wqe_mkey_context_seg { + uint8_t free; + uint8_t reserved1; + uint8_t access_flags; + uint8_t sf; + __be32 qpn_mkey; + __be32 reserved2; + __be32 flags_pd; + __be64 start_addr; + __be64 len; + __be32 bsf_octword_size; + __be32 reserved3[4]; + __be32 translations_octword_size; + uint8_t reserved4[3]; + uint8_t log_page_size; + __be32 reserved; + union mlx5_wqe_umr_inline_seg inseg[0]; +}; + +/* + * Control segment - contains some control information for the current WQE. + * + * Output: + * seg - control segment to be filled + * Input: + * pi - WQEBB number of the first block of this WQE. + * This number should wrap at 0xffff, regardless of + * size of the WQ. + * opcode - Opcode of this WQE. Encodes the type of operation + * to be executed on the QP. + * opmod - Opcode modifier. + * qp_num - QP/SQ number this WQE is posted to. + * fm_ce_se - FM (fence mode), CE (completion and event mode) + * and SE (solicited event). + * ds - WQE size in octowords (16-byte units). DS accounts for all + * the segments in the WQE as summarized in WQE construction. + * signature - WQE signature. + * imm - Immediate data/Invalidation key/UMR mkey. + */ +static MLX5DV_ALWAYS_INLINE +void mlx5dv_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, + uint8_t opcode, uint8_t opmod, uint32_t qp_num, + uint8_t fm_ce_se, uint8_t ds, + uint8_t signature, uint32_t imm) +{ + seg->opmod_idx_opcode = htobe32(((uint32_t)opmod << 24) | ((uint32_t)pi << 8) | opcode); + seg->qpn_ds = htobe32((qp_num << 8) | ds); + seg->fm_ce_se = fm_ce_se; + seg->signature = signature; + /* + * The caller should prepare "imm" in advance based on WR opcode. + * For IBV_WR_SEND_WITH_IMM and IBV_WR_RDMA_WRITE_WITH_IMM, + * the "imm" should be assigned as is. + * For the IBV_WR_SEND_WITH_INV, it should be htobe32(imm). + */ + seg->imm = imm; +} + +/* x86 optimized version of mlx5dv_set_ctrl_seg() + * + * This is useful when doing calculations on large data sets + * for parallel calculations. + * + * It doesn't suit for serialized algorithms. + */ +#if defined(__SSE3__) +static MLX5DV_ALWAYS_INLINE +void mlx5dv_x86_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, + uint8_t opcode, uint8_t opmod, uint32_t qp_num, + uint8_t fm_ce_se, uint8_t ds, + uint8_t signature, uint32_t imm) +{ + __m128i val = _mm_set_epi32(imm, qp_num, (ds << 16) | pi, + (signature << 24) | (opcode << 16) | (opmod << 8) | fm_ce_se); + __m128i mask = _mm_set_epi8(15, 14, 13, 12, /* immediate */ + 0, /* signal/fence_mode */ +#if CHAR_MIN + -128, -128, /* reserved */ +#else + 0x80, 0x80, /* reserved */ +#endif + 3, /* signature */ + 6, /* data size */ + 8, 9, 10, /* QP num */ + 2, /* opcode */ + 4, 5, /* sw_pi in BE */ + 1 /* opmod */ + ); + *(__m128i *) seg = _mm_shuffle_epi8(val, mask); +} +#endif /* defined(__SSE3__) */ + +/* + * Datagram Segment - contains address information required in order + * to form a datagram message. + * + * Output: + * seg - datagram segment to be filled. + * Input: + * key - Q_key/access key. + * dqp_dct - Destination QP number for UD and DCT for DC. + * ext - Address vector extension. + * stat_rate_sl - Maximum static rate control, SL/ethernet priority. + * fl_mlid - Force loopback and source LID for IB. + * rlid - Remote LID + * rmac - Remote MAC + * tclass - GRH tclass/IPv6 tclass/IPv4 ToS + * hop_limit - GRH hop limit/IPv6 hop limit/IPv4 TTL + * grh_gid_fi - GRH, source GID address and IPv6 flow label. + * rgid - Remote GID/IP address. + */ +static MLX5DV_ALWAYS_INLINE +void mlx5dv_set_dgram_seg(struct mlx5_wqe_datagram_seg *seg, + uint64_t key, uint32_t dqp_dct, + uint8_t ext, uint8_t stat_rate_sl, + uint8_t fl_mlid, uint16_t rlid, + uint8_t *rmac, uint8_t tclass, + uint8_t hop_limit, uint32_t grh_gid_fi, + uint8_t *rgid) +{ + + /* Always put 64 bits, in q_key, the reserved part will be 0 */ + seg->av.key.dc_key = htobe64(key); + seg->av.dqp_dct = htobe32(((uint32_t)ext << 31) | dqp_dct); + seg->av.stat_rate_sl = stat_rate_sl; + seg->av.fl_mlid = fl_mlid; + seg->av.rlid = htobe16(rlid); + memcpy(seg->av.rmac, rmac, 6); + seg->av.tclass = tclass; + seg->av.hop_limit = hop_limit; + seg->av.grh_gid_fl = htobe32(grh_gid_fi); + memcpy(seg->av.rgid, rgid, 16); +} + +/* + * Data Segments - contain pointers and a byte count for the scatter/gather list. + * They can optionally contain data, which will save a memory read access for + * gather Work Requests. + */ +static MLX5DV_ALWAYS_INLINE +void mlx5dv_set_data_seg(struct mlx5_wqe_data_seg *seg, + uint32_t length, uint32_t lkey, + uintptr_t address) +{ + seg->byte_count = htobe32(length); + seg->lkey = htobe32(lkey); + seg->addr = htobe64(address); +} +/* + * x86 optimized version of mlx5dv_set_data_seg() + * + * This is useful when doing calculations on large data sets + * for parallel calculations. + * + * It doesn't suit for serialized algorithms. + */ +#if defined(__SSE3__) +static MLX5DV_ALWAYS_INLINE +void mlx5dv_x86_set_data_seg(struct mlx5_wqe_data_seg *seg, + uint32_t length, uint32_t lkey, + uintptr_t address) +{ + + uint64_t address64 = address; + __m128i val = _mm_set_epi32((uint32_t)address64, (uint32_t)(address64 >> 32), lkey, length); + __m128i mask = _mm_set_epi8(12, 13, 14, 15, /* local address low */ + 8, 9, 10, 11, /* local address high */ + 4, 5, 6, 7, /* l_key */ + 0, 1, 2, 3 /* byte count */ + ); + *(__m128i *) seg = _mm_shuffle_epi8(val, mask); +} +#endif /* defined(__SSE3__) */ + +/* + * Eth Segment - contains packet headers and information for stateless L2, L3, L4 offloading. + * + * Output: + * seg - Eth segment to be filled. + * Input: + * cs_flags - l3cs/l3cs_inner/l4cs/l4cs_inner. + * mss - Maximum segment size. For TSO WQEs, the number of bytes + * in the TCP payload to be transmitted in each packet. Must + * be 0 on non TSO WQEs. + * inline_hdr_sz - Length of the inlined packet headers. + * inline_hdr_start - Inlined packet header. + */ +static MLX5DV_ALWAYS_INLINE +void mlx5dv_set_eth_seg(struct mlx5_wqe_eth_seg *seg, uint8_t cs_flags, + uint16_t mss, uint16_t inline_hdr_sz, + uint8_t *inline_hdr_start) +{ + seg->cs_flags = cs_flags; + seg->mss = htobe16(mss); + seg->inline_hdr_sz = htobe16(inline_hdr_sz); + memcpy(seg->inline_hdr_start, inline_hdr_start, inline_hdr_sz); +} + +enum mlx5dv_set_ctx_attr_type { + MLX5DV_CTX_ATTR_BUF_ALLOCATORS = 1, +}; + +enum { + MLX5_MMAP_GET_REGULAR_PAGES_CMD = 0, + MLX5_MMAP_GET_NC_PAGES_CMD = 3, +}; + +struct mlx5dv_ctx_allocators { + void *(*alloc)(size_t size, void *priv_data); + void (*free)(void *ptr, void *priv_data); + void *data; +}; + +/* + * Generic context attributes set API + * + * Returns 0 on success, or the value of errno on failure + * (which indicates the failure reason). + */ +int mlx5dv_set_context_attr(struct ibv_context *context, + enum mlx5dv_set_ctx_attr_type type, void *attr); + +struct mlx5dv_clock_info { + uint64_t nsec; + uint64_t last_cycles; + uint64_t frac; + uint32_t mult; + uint32_t shift; + uint64_t mask; +}; + +/* + * Get mlx5 core clock info + * + * Output: + * clock_info - clock info to be filled + * Input: + * context - device context + * + * Return: 0 on success, or the value of errno on failure + */ +int mlx5dv_get_clock_info(struct ibv_context *context, + struct mlx5dv_clock_info *clock_info); + +/* + * Translate device timestamp to nano-sec + * + * Input: + * clock_info - clock info to be filled + * device_timestamp - timestamp to translate + * + * Return: nano-sec + */ +static inline uint64_t mlx5dv_ts_to_ns(struct mlx5dv_clock_info *clock_info, + uint64_t device_timestamp) +{ + uint64_t delta, nsec; + + /* + * device_timestamp & cycles are the free running 'mask' bit counters + * from the hardware hca_core_clock clock. + */ + delta = (device_timestamp - clock_info->last_cycles) & clock_info->mask; + nsec = clock_info->nsec; + + /* + * Guess if the device_timestamp is more recent than + * clock_info->last_cycles, if not (too far in the future) treat + * it as old time stamp. This will break every max_clock_info_update_nsec. + */ + + if (delta > clock_info->mask / 2) { + delta = (clock_info->last_cycles - device_timestamp) & + clock_info->mask; + nsec -= ((delta * clock_info->mult) - clock_info->frac) >> + clock_info->shift; + } else { + nsec += ((delta * clock_info->mult) + clock_info->frac) >> + clock_info->shift; + } + + return nsec; +} + +enum mlx5dv_context_attr_flags { + MLX5DV_CONTEXT_FLAGS_DEVX = 1 << 0, +}; + +struct mlx5dv_context_attr { + uint32_t flags; /* Use enum mlx5dv_context_attr_flags */ + uint64_t comp_mask; +}; + +bool mlx5dv_is_supported(struct ibv_device *device); + +enum mlx5dv_vfio_context_attr_flags { + MLX5DV_VFIO_CTX_FLAGS_INIT_LINK_DOWN = 1 << 0, +}; + +struct mlx5dv_vfio_context_attr { + const char *pci_name; + uint32_t flags; /* Use enum mlx5dv_vfio_context_attr_flags */ + uint64_t comp_mask; +}; + +struct ibv_device ** +mlx5dv_get_vfio_device_list(struct mlx5dv_vfio_context_attr *attr); + +int mlx5dv_vfio_get_events_fd(struct ibv_context *ibctx); + +/* This API should run from application thread and maintain device events. + * The application is responsible to get the events FD by calling mlx5dv_vfio_get_events_fd + * and once the FD is pollable call the API to let driver process the ready events. + */ +int mlx5dv_vfio_process_events(struct ibv_context *context); + +struct ibv_context * +mlx5dv_open_device(struct ibv_device *device, struct mlx5dv_context_attr *attr); + +struct mlx5dv_devx_obj; + +struct mlx5dv_devx_obj * +mlx5dv_devx_obj_create(struct ibv_context *context, const void *in, size_t inlen, + void *out, size_t outlen); +int mlx5dv_devx_obj_query(struct mlx5dv_devx_obj *obj, const void *in, size_t inlen, + void *out, size_t outlen); +int mlx5dv_devx_obj_modify(struct mlx5dv_devx_obj *obj, const void *in, size_t inlen, + void *out, size_t outlen); +int mlx5dv_devx_obj_destroy(struct mlx5dv_devx_obj *obj); +int mlx5dv_devx_general_cmd(struct ibv_context *context, const void *in, size_t inlen, + void *out, size_t outlen); + +int _mlx5dv_query_port(struct ibv_context *context, + uint32_t port_num, + struct mlx5dv_port *info, + size_t info_len); + +static inline int mlx5dv_query_port(struct ibv_context *context, + uint32_t port_num, + struct mlx5dv_port *info) +{ + return _mlx5dv_query_port(context, port_num, info, sizeof(*info)); +} + +struct mlx5dv_devx_umem { + uint32_t umem_id; +}; + +struct mlx5dv_devx_umem * +mlx5dv_devx_umem_reg(struct ibv_context *ctx, void *addr, size_t size, uint32_t access); + +enum mlx5dv_devx_umem_in_mask { + MLX5DV_UMEM_MASK_DMABUF = 1 << 0, +}; + +struct mlx5dv_devx_umem_in { + void *addr; + size_t size; + uint32_t access; + uint64_t pgsz_bitmap; + uint64_t comp_mask; + int dmabuf_fd; +}; + +struct mlx5dv_devx_umem * +mlx5dv_devx_umem_reg_ex(struct ibv_context *ctx, struct mlx5dv_devx_umem_in *umem_in); + +int mlx5dv_devx_umem_dereg(struct mlx5dv_devx_umem *umem); + +struct mlx5dv_devx_uar { + void *reg_addr; + void *base_addr; + uint32_t page_id; + off_t mmap_off; + uint64_t comp_mask; +}; + +struct mlx5dv_devx_uar *mlx5dv_devx_alloc_uar(struct ibv_context *context, + uint32_t flags); +void mlx5dv_devx_free_uar(struct mlx5dv_devx_uar *devx_uar); + + +struct mlx5dv_var { + uint32_t page_id; + uint32_t length; + off_t mmap_off; + uint64_t comp_mask; +}; + +struct mlx5dv_var * +mlx5dv_alloc_var(struct ibv_context *context, uint32_t flags); +void mlx5dv_free_var(struct mlx5dv_var *dv_var); + +int mlx5dv_devx_query_eqn(struct ibv_context *context, uint32_t vector, + uint32_t *eqn); + +int mlx5dv_devx_cq_query(struct ibv_cq *cq, const void *in, size_t inlen, + void *out, size_t outlen); +int mlx5dv_devx_cq_modify(struct ibv_cq *cq, const void *in, size_t inlen, + void *out, size_t outlen); +int mlx5dv_devx_qp_query(struct ibv_qp *qp, const void *in, size_t inlen, + void *out, size_t outlen); +int mlx5dv_devx_qp_modify(struct ibv_qp *qp, const void *in, size_t inlen, + void *out, size_t outlen); +int mlx5dv_devx_srq_query(struct ibv_srq *srq, const void *in, size_t inlen, + void *out, size_t outlen); +int mlx5dv_devx_srq_modify(struct ibv_srq *srq, const void *in, size_t inlen, + void *out, size_t outlen); +int mlx5dv_devx_wq_query(struct ibv_wq *wq, const void *in, size_t inlen, + void *out, size_t outlen); +int mlx5dv_devx_wq_modify(struct ibv_wq *wq, const void *in, size_t inlen, + void *out, size_t outlen); +int mlx5dv_devx_ind_tbl_query(struct ibv_rwq_ind_table *ind_tbl, + const void *in, size_t inlen, + void *out, size_t outlen); +int mlx5dv_devx_ind_tbl_modify(struct ibv_rwq_ind_table *ind_tbl, + const void *in, size_t inlen, + void *out, size_t outlen); + +struct mlx5dv_devx_cmd_comp { + int fd; +}; + +struct mlx5dv_devx_cmd_comp * +mlx5dv_devx_create_cmd_comp(struct ibv_context *context); +void mlx5dv_devx_destroy_cmd_comp(struct mlx5dv_devx_cmd_comp *cmd_comp); +int mlx5dv_devx_obj_query_async(struct mlx5dv_devx_obj *obj, const void *in, + size_t inlen, size_t outlen, + uint64_t wr_id, + struct mlx5dv_devx_cmd_comp *cmd_comp); + +int mlx5dv_devx_get_async_cmd_comp(struct mlx5dv_devx_cmd_comp *cmd_comp, + struct mlx5dv_devx_async_cmd_hdr *cmd_resp, + size_t cmd_resp_len); + +struct mlx5dv_devx_event_channel { + int fd; +}; + +struct mlx5dv_devx_event_channel * +mlx5dv_devx_create_event_channel(struct ibv_context *context, + enum mlx5dv_devx_create_event_channel_flags flags); +void mlx5dv_devx_destroy_event_channel(struct mlx5dv_devx_event_channel *event_channel); + + +int mlx5dv_devx_subscribe_devx_event(struct mlx5dv_devx_event_channel *event_channel, + struct mlx5dv_devx_obj *obj, /* can be NULL for unaffiliated events */ + uint16_t events_sz, + uint16_t events_num[], + uint64_t cookie); + +int mlx5dv_devx_subscribe_devx_event_fd(struct mlx5dv_devx_event_channel *event_channel, + int fd, + struct mlx5dv_devx_obj *obj, /* can be NULL for unaffiliated events */ + uint16_t event_num); + +/* return code: upon success number of bytes read, otherwise -1 and errno was set */ +ssize_t mlx5dv_devx_get_event(struct mlx5dv_devx_event_channel *event_channel, + struct mlx5dv_devx_async_event_hdr *event_data, + size_t event_resp_len); + + +#define __devx_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)NULL) +#define __devx_st_sz_bits(typ) sizeof(struct mlx5_ifc_##typ##_bits) +#define __devx_bit_sz(typ, fld) sizeof(__devx_nullp(typ)->fld) +#define __devx_bit_off(typ, fld) offsetof(struct mlx5_ifc_##typ##_bits, fld) +#define __devx_dw_off(bit_off) ((bit_off) / 32) +#define __devx_64_off(bit_off) ((bit_off) / 64) +#define __devx_dw_bit_off(bit_sz, bit_off) (32 - (bit_sz) - ((bit_off) & 0x1f)) +#define __devx_mask(bit_sz) ((uint32_t)((1ull << (bit_sz)) - 1)) +#define __devx_dw_mask(bit_sz, bit_off) \ + (__devx_mask(bit_sz) << __devx_dw_bit_off(bit_sz, bit_off)) + +#define DEVX_FLD_SZ_BYTES(typ, fld) (__devx_bit_sz(typ, fld) / 8) +#define DEVX_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8) +#define DEVX_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32) +#define DEVX_ST_SZ_QW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 64) +#define DEVX_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) +#define DEVX_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) +#define DEVX_BYTE_OFF(typ, fld) (__devx_bit_off(typ, fld) / 8) +#define DEVX_ADDR_OF(typ, p, fld) \ + ((unsigned char *)(p) + DEVX_BYTE_OFF(typ, fld)) + +static inline void _devx_set(void *p, uint32_t value, size_t bit_off, + size_t bit_sz) +{ + __be32 *fld = (__be32 *)(p) + __devx_dw_off(bit_off); + uint32_t dw_mask = __devx_dw_mask(bit_sz, bit_off); + uint32_t mask = __devx_mask(bit_sz); + + *fld = htobe32((be32toh(*fld) & (~dw_mask)) | + ((value & mask) << __devx_dw_bit_off(bit_sz, bit_off))); +} + +#define DEVX_SET(typ, p, fld, v) \ + _devx_set(p, v, __devx_bit_off(typ, fld), __devx_bit_sz(typ, fld)) + +static inline uint32_t _devx_get(const void *p, size_t bit_off, size_t bit_sz) +{ + return ((be32toh(*((const __be32 *)(p) + __devx_dw_off(bit_off))) >> + __devx_dw_bit_off(bit_sz, bit_off)) & + __devx_mask(bit_sz)); +} + +#define DEVX_GET(typ, p, fld) \ + _devx_get(p, __devx_bit_off(typ, fld), __devx_bit_sz(typ, fld)) + +static inline void _devx_set64(void *p, uint64_t v, size_t bit_off) +{ + *((__be64 *)(p) + __devx_64_off(bit_off)) = htobe64(v); +} + +#define DEVX_SET64(typ, p, fld, v) _devx_set64(p, v, __devx_bit_off(typ, fld)) + +static inline uint64_t _devx_get64(const void *p, size_t bit_off) +{ + return be64toh(*((const __be64 *)(p) + __devx_64_off(bit_off))); +} + +#define DEVX_GET64(typ, p, fld) _devx_get64(p, __devx_bit_off(typ, fld)) + +#define DEVX_ARRAY_SET64(typ, p, fld, idx, v) do { \ + DEVX_SET64(typ, p, fld[idx], v); \ +} while (0) + +struct mlx5dv_dr_domain; +struct mlx5dv_dr_table; +struct mlx5dv_dr_matcher; +struct mlx5dv_dr_rule; +struct mlx5dv_dr_action; + +enum mlx5dv_dr_domain_type { + MLX5DV_DR_DOMAIN_TYPE_NIC_RX, + MLX5DV_DR_DOMAIN_TYPE_NIC_TX, + MLX5DV_DR_DOMAIN_TYPE_FDB, +}; + +enum mlx5dv_dr_domain_sync_flags { + MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW = 1 << 0, + MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW = 1 << 1, + MLX5DV_DR_DOMAIN_SYNC_FLAGS_MEM = 1 << 2, +}; + +struct mlx5dv_dr_flow_meter_attr { + struct mlx5dv_dr_table *next_table; + uint8_t active; + uint8_t reg_c_index; + size_t flow_meter_parameter_sz; + void *flow_meter_parameter; +}; + +struct mlx5dv_dr_flow_sampler_attr { + uint32_t sample_ratio; + struct mlx5dv_dr_table *default_next_table; + uint32_t num_sample_actions; + struct mlx5dv_dr_action **sample_actions; + __be64 action; +}; + +struct mlx5dv_dr_domain * +mlx5dv_dr_domain_create(struct ibv_context *ctx, + enum mlx5dv_dr_domain_type type); + +int mlx5dv_dr_domain_destroy(struct mlx5dv_dr_domain *domain); + +int mlx5dv_dr_domain_sync(struct mlx5dv_dr_domain *domain, uint32_t flags); + +void mlx5dv_dr_domain_set_reclaim_device_memory(struct mlx5dv_dr_domain *dmn, + bool enable); + +void mlx5dv_dr_domain_allow_duplicate_rules(struct mlx5dv_dr_domain *domain, + bool allow); + +struct mlx5dv_dr_table * +mlx5dv_dr_table_create(struct mlx5dv_dr_domain *domain, uint32_t level); + +int mlx5dv_dr_table_destroy(struct mlx5dv_dr_table *table); + +struct mlx5dv_dr_matcher * +mlx5dv_dr_matcher_create(struct mlx5dv_dr_table *table, + uint16_t priority, + uint8_t match_criteria_enable, + struct mlx5dv_flow_match_parameters *mask); + +int mlx5dv_dr_matcher_destroy(struct mlx5dv_dr_matcher *matcher); + +enum mlx5dv_dr_matcher_layout_flags { + MLX5DV_DR_MATCHER_LAYOUT_RESIZABLE = 1 << 0, + MLX5DV_DR_MATCHER_LAYOUT_NUM_RULE = 1 << 1, +}; + +struct mlx5dv_dr_matcher_layout { + uint32_t flags; /* use enum mlx5dv_dr_matcher_layout_flags */ + uint32_t log_num_of_rules_hint; +}; + +int mlx5dv_dr_matcher_set_layout(struct mlx5dv_dr_matcher *matcher, + struct mlx5dv_dr_matcher_layout *layout); + +struct mlx5dv_dr_rule * +mlx5dv_dr_rule_create(struct mlx5dv_dr_matcher *matcher, + struct mlx5dv_flow_match_parameters *value, + size_t num_actions, + struct mlx5dv_dr_action *actions[]); + +int mlx5dv_dr_rule_destroy(struct mlx5dv_dr_rule *rule); + +enum mlx5dv_dr_action_flags { + MLX5DV_DR_ACTION_FLAGS_ROOT_LEVEL = 1 << 0, +}; + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_dest_ibv_qp(struct ibv_qp *ibqp); + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_dest_table(struct mlx5dv_dr_table *table); + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_dest_vport(struct mlx5dv_dr_domain *domain, + uint32_t vport); + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_dest_ib_port(struct mlx5dv_dr_domain *domain, + uint32_t ib_port); + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_dest_devx_tir(struct mlx5dv_devx_obj *devx_obj); + +enum mlx5dv_dr_action_dest_type { + MLX5DV_DR_ACTION_DEST, + MLX5DV_DR_ACTION_DEST_REFORMAT, +}; + +struct mlx5dv_dr_action_dest_reformat { + struct mlx5dv_dr_action *reformat; + struct mlx5dv_dr_action *dest; +}; + +struct mlx5dv_dr_action_dest_attr { + enum mlx5dv_dr_action_dest_type type; + union { + struct mlx5dv_dr_action *dest; + struct mlx5dv_dr_action_dest_reformat *dest_reformat; + }; +}; + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_dest_array(struct mlx5dv_dr_domain *domain, + size_t num_dest, + struct mlx5dv_dr_action_dest_attr *dests[]); + +struct mlx5dv_dr_action *mlx5dv_dr_action_create_drop(void); + +struct mlx5dv_dr_action *mlx5dv_dr_action_create_default_miss(void); + +struct mlx5dv_dr_action *mlx5dv_dr_action_create_tag(uint32_t tag_value); + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_flow_counter(struct mlx5dv_devx_obj *devx_obj, + uint32_t offset); + +enum mlx5dv_dr_action_aso_first_hit_flags { + MLX5DV_DR_ACTION_FLAGS_ASO_FIRST_HIT_SET = 1 << 0, +}; + +enum mlx5dv_dr_action_aso_flow_meter_flags { + MLX5DV_DR_ACTION_FLAGS_ASO_FLOW_METER_RED = 1 << 0, + MLX5DV_DR_ACTION_FLAGS_ASO_FLOW_METER_YELLOW = 1 << 1, + MLX5DV_DR_ACTION_FLAGS_ASO_FLOW_METER_GREEN = 1 << 2, + MLX5DV_DR_ACTION_FLAGS_ASO_FLOW_METER_UNDEFINED = 1 << 3, +}; + +enum mlx5dv_dr_action_aso_ct_flags { + MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_INITIATOR = 1 << 0, + MLX5DV_DR_ACTION_FLAGS_ASO_CT_DIRECTION_RESPONDER = 1 << 1, +}; + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_aso(struct mlx5dv_dr_domain *domain, + struct mlx5dv_devx_obj *devx_obj, + uint32_t offset, + uint32_t flags, + uint8_t return_reg_c); + +int mlx5dv_dr_action_modify_aso(struct mlx5dv_dr_action *action, + uint32_t offset, + uint32_t flags, + uint8_t return_reg_c); + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_packet_reformat(struct mlx5dv_dr_domain *domain, + uint32_t flags, + enum mlx5dv_flow_action_packet_reformat_type reformat_type, + size_t data_sz, void *data); + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_modify_header(struct mlx5dv_dr_domain *domain, + uint32_t flags, + size_t actions_sz, + __be64 actions[]); + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_flow_meter(struct mlx5dv_dr_flow_meter_attr *attr); + +int mlx5dv_dr_action_modify_flow_meter(struct mlx5dv_dr_action *action, + struct mlx5dv_dr_flow_meter_attr *attr, + __be64 modify_field_select); + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_flow_sampler(struct mlx5dv_dr_flow_sampler_attr *attr); + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_pop_vlan(void); + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_push_vlan(struct mlx5dv_dr_domain *domain, + __be32 vlan_hdr); + +struct mlx5dv_dr_action * +mlx5dv_dr_action_create_dest_root_table(struct mlx5dv_dr_table *table, + uint16_t priority); + +int mlx5dv_dr_action_destroy(struct mlx5dv_dr_action *action); + +int mlx5dv_dump_dr_domain(FILE *fout, struct mlx5dv_dr_domain *domain); +int mlx5dv_dump_dr_table(FILE *fout, struct mlx5dv_dr_table *table); +int mlx5dv_dump_dr_matcher(FILE *fout, struct mlx5dv_dr_matcher *matcher); +int mlx5dv_dump_dr_rule(FILE *fout, struct mlx5dv_dr_rule *rule); + +struct mlx5dv_pp { + uint16_t index; +}; + +struct mlx5dv_pp *mlx5dv_pp_alloc(struct ibv_context *context, + size_t pp_context_sz, + const void *pp_context, + uint32_t flags); + +void mlx5dv_pp_free(struct mlx5dv_pp *pp); + +int mlx5dv_query_qp_lag_port(struct ibv_qp *qp, + uint8_t *port_num, + uint8_t *active_port_num); + +int mlx5dv_modify_qp_lag_port(struct ibv_qp *qp, uint8_t port_num); + +int mlx5dv_modify_qp_udp_sport(struct ibv_qp *qp, uint16_t udp_sport); + +int mlx5dv_dci_stream_id_reset(struct ibv_qp *qp, uint16_t stream_id); + +enum mlx5dv_sched_elem_attr_flags { + MLX5DV_SCHED_ELEM_ATTR_FLAGS_BW_SHARE = 1 << 0, + MLX5DV_SCHED_ELEM_ATTR_FLAGS_MAX_AVG_BW = 1 << 1, +}; + +struct mlx5dv_sched_attr { + struct mlx5dv_sched_node *parent; + uint32_t flags; /* Use mlx5dv_sched_elem_attr_flags */ + uint32_t bw_share; + uint32_t max_avg_bw; + uint64_t comp_mask; +}; + +struct mlx5dv_sched_node; +struct mlx5dv_sched_leaf; + +struct mlx5dv_sched_node * +mlx5dv_sched_node_create(struct ibv_context *context, + const struct mlx5dv_sched_attr *sched_attr); +struct mlx5dv_sched_leaf * +mlx5dv_sched_leaf_create(struct ibv_context *context, + const struct mlx5dv_sched_attr *sched_attr); + +int mlx5dv_sched_node_modify(struct mlx5dv_sched_node *node, + const struct mlx5dv_sched_attr *sched_attr); + +int mlx5dv_sched_leaf_modify(struct mlx5dv_sched_leaf *leaf, + const struct mlx5dv_sched_attr *sched_attr); + +int mlx5dv_sched_node_destroy(struct mlx5dv_sched_node *node); + +int mlx5dv_sched_leaf_destroy(struct mlx5dv_sched_leaf *leaf); + +int mlx5dv_modify_qp_sched_elem(struct ibv_qp *qp, + const struct mlx5dv_sched_leaf *requestor, + const struct mlx5dv_sched_leaf *responder); + +int mlx5dv_reserved_qpn_alloc(struct ibv_context *ctx, uint32_t *qpn); +int mlx5dv_reserved_qpn_dealloc(struct ibv_context *ctx, uint32_t qpn); + +int mlx5dv_dr_aso_other_domain_link(struct mlx5dv_devx_obj *devx_obj, + struct mlx5dv_dr_domain *peer_dmn, + struct mlx5dv_dr_domain *dmn, + uint32_t flags, + uint8_t return_reg_c); +int mlx5dv_dr_aso_other_domain_unlink(struct mlx5dv_devx_obj *devx_obj, + struct mlx5dv_dr_domain *dmn); + +struct mlx5dv_devx_msi_vector { + int vector; + int fd; +}; + +struct mlx5dv_devx_msi_vector * +mlx5dv_devx_alloc_msi_vector(struct ibv_context *ibctx); + +int mlx5dv_devx_free_msi_vector(struct mlx5dv_devx_msi_vector *msi); + +struct mlx5dv_devx_eq { + void *vaddr; +}; + +struct mlx5dv_devx_eq * +mlx5dv_devx_create_eq(struct ibv_context *ibctx, const void *in, size_t inlen, + void *out, size_t outlen); + +int mlx5dv_devx_destroy_eq(struct mlx5dv_devx_eq *eq); + +#ifdef __cplusplus +} +#endif + +#endif /* _MLX5DV_H_ */ diff --git a/src/gda/mlx5/provider_gda_mlx5.hpp b/src/gda/mlx5/provider_gda_mlx5.hpp new file mode 100644 index 0000000000..6c43f59c67 --- /dev/null +++ b/src/gda/mlx5/provider_gda_mlx5.hpp @@ -0,0 +1,37 @@ +/****************************************************************************** + * Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + *****************************************************************************/ + +#ifndef LIBRARY_SRC_GDA_MLX5_GDA_PROVIDER_HPP_ +#define LIBRARY_SRC_GDA_MLX5_GDA_PROVIDER_HPP_ + +extern "C" { +#include "gda/mlx5/mlx5dv.h" +} + +typedef union db_reg { + uint64_t *ptr; + uintptr_t uint; +} db_reg_t; + +#endif //LIBRARY_SRC_GDA_MLX5_GDA_PROVIDER_HPP_ diff --git a/src/gda/queue_pair.cpp b/src/gda/queue_pair.cpp index e4ec342854..9b10d22b61 100644 --- a/src/gda/queue_pair.cpp +++ b/src/gda/queue_pair.cpp @@ -28,15 +28,13 @@ #include "backend_gda.hpp" #include "endian.hpp" -#if defined(GDA_MLX5) #include "segment_builder.hpp" -#endif #include "util.hpp" #include "constants.hpp" namespace rocshmem { -QueuePair::QueuePair(struct ibv_pd* pd) { +QueuePair::QueuePair(struct ibv_pd* pd, int gda_vendor) { int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ @@ -56,17 +54,35 @@ QueuePair::QueuePair(struct ibv_pd* pd) { mr_fetching_atomic = ibv_reg_mr(pd, fetching_atomic, 8 * FETCHING_ATOMIC_CNT, access); CHECK_NNULL(mr_fetching_atomic, "ibv_reg_mr"); -#if defined(GDA_MLX5) - nonfetching_atomic_lkey = htobe32(mr_nonfetching_atomic->lkey); - fetching_atomic_lkey = htobe32(mr_fetching_atomic->lkey); -#else - nonfetching_atomic_lkey = mr_nonfetching_atomic->lkey; - fetching_atomic_lkey = mr_fetching_atomic->lkey; -#endif + if (gda_vendor == GDAVendor::MLX5) { + nonfetching_atomic_lkey = htobe32(mr_nonfetching_atomic->lkey); + fetching_atomic_lkey = htobe32(mr_fetching_atomic->lkey); + } else { + nonfetching_atomic_lkey = mr_nonfetching_atomic->lkey; + fetching_atomic_lkey = mr_fetching_atomic->lkey; + } for(int i{0}; i < FETCHING_ATOMIC_CNT; i+=WF_SIZE) { fetching_atomic_freelist->push_back(fetching_atomic + i); } + + /* Set Correct opcodes for each NIC */ +#ifdef GDA_IONIC + gda_op_rdma_write = IONIC_V2_OP_RDMA_WRITE; + gda_op_atomic_fa = IONIC_V2_OP_ATOMIC_FA; + gda_op_atomic_cs = IONIC_V2_OP_ATOMIC_CS; +#endif + if (gda_vendor == GDAVendor::BNXT) { + gda_op_rdma_write = BNXT_RE_WR_OPCD_RDMA_WRITE; + gda_op_rdma_read = BNXT_RE_WR_OPCD_RDMA_READ; + gda_op_atomic_fa = BNXT_RE_WR_OPCD_ATOMIC_FA; + gda_op_atomic_cs = BNXT_RE_WR_OPCD_ATOMIC_CS; + } else if (gda_vendor == GDAVendor::MLX5) { + gda_op_rdma_write = MLX5_OPCODE_RDMA_WRITE; + gda_op_rdma_read = MLX5_OPCODE_RDMA_READ; + gda_op_atomic_fa = MLX5_OPCODE_ATOMIC_FA; + gda_op_atomic_cs = MLX5_OPCODE_ATOMIC_CS; + } } QueuePair::~QueuePair() { @@ -447,7 +463,7 @@ __device__ void QueuePair::post_wqe_rma(int pe, int32_t size, uintptr_t *laddr, seg_build.update_ctrl_seg(my_sq_counter, opcode, 0, qp_num, MLX5_WQE_CTRL_CQ_UPDATE, 3, 0, 0); seg_build.update_raddr_seg(raddr, rkey); - if (size <= inline_threshold && opcode == GDA_OP_RDMA_WRITE) { + if (size <= inline_threshold && opcode == gda_op_rdma_write) { seg_build.update_inl_data_seg(laddr, size); } else { seg_build.update_data_seg(laddr, size, lkey); @@ -629,23 +645,33 @@ __device__ uint64_t QueuePair::post_wqe_amo(int pe, int32_t size, uintptr_t *rad __device__ void QueuePair::put_nbi(void *dest, const void *source, size_t nelems, int pe) { uintptr_t *src = reinterpret_cast(const_cast(source)); uintptr_t *dst = reinterpret_cast(dest); - post_wqe_rma(pe, nelems, src, dst, GDA_OP_RDMA_WRITE); + post_wqe_rma(pe, nelems, src, dst, gda_op_rdma_write); } __device__ void QueuePair::get_nbi(void *dest, const void *source, size_t nelems, int pe) { uintptr_t *src = reinterpret_cast(const_cast(source)); uintptr_t *dst = reinterpret_cast(dest); - post_wqe_rma(pe, nelems, dst, src, GDA_OP_RDMA_READ); + post_wqe_rma(pe, nelems, dst, src, gda_op_rdma_read); } -__device__ int64_t QueuePair::atomic_fetch(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe, uint8_t atomic_op) { +__device__ int64_t QueuePair::atomic_cas(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe) { uintptr_t *dst = reinterpret_cast(dest); - return post_wqe_amo(pe, sizeof(int64_t), dst, atomic_op, atomic_data, atomic_cmp, true); + return post_wqe_amo(pe, sizeof(int64_t), dst, gda_op_atomic_cs, atomic_data, atomic_cmp, true); } -__device__ void QueuePair::atomic_nofetch(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe, uint8_t atomic_op) { +__device__ int64_t QueuePair::atomic_cas_nofetch(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe) { uintptr_t *dst = reinterpret_cast(dest); - post_wqe_amo(pe, sizeof(int64_t), dst, atomic_op, atomic_data, atomic_cmp, false); + return post_wqe_amo(pe, sizeof(int64_t), dst, gda_op_atomic_cs, atomic_data, atomic_cmp, false); +} + +__device__ int64_t QueuePair::atomic_fetch(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe) { + uintptr_t *dst = reinterpret_cast(dest); + return post_wqe_amo(pe, sizeof(int64_t), dst, gda_op_atomic_fa, atomic_data, atomic_cmp, true); +} + +__device__ void QueuePair::atomic_nofetch(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe) { + uintptr_t *dst = reinterpret_cast(dest); + post_wqe_amo(pe, sizeof(int64_t), dst, gda_op_atomic_fa, atomic_data, atomic_cmp, false); } } // namespace rocshmem diff --git a/src/gda/queue_pair.hpp b/src/gda/queue_pair.hpp index 3d1801f254..da9ccd33e4 100644 --- a/src/gda/queue_pair.hpp +++ b/src/gda/queue_pair.hpp @@ -37,48 +37,18 @@ #include "rocshmem_config.h" #include "endian.h" #include "constants.hpp" -#ifdef GDA_IONIC -extern "C" { -#include -#include -} -#elif defined(GDA_BNXT) -#include "bnxt/provider_gda_bnxt.hpp" -#elif defined(GDA_MLX5) -#include -#else -#error "Please select an RDMA provider" -#endif + +#include "gda/ionic/provider_gda_ionic.hpp" +#include "gda/mlx5/provider_gda_mlx5.hpp" +#include "gda/bnxt/provider_gda_bnxt.hpp" #include "containers/free_list.hpp" #include "memory/hip_allocator.hpp" -#ifdef GDA_IONIC -#define GDA_MAX_ATOMIC 15 -#define GDA_OP_RDMA_WRITE IONIC_V2_OP_RDMA_WRITE -#define GDA_OP_ATOMIC_FA IONIC_V2_OP_ATOMIC_FA -#define GDA_OP_ATOMIC_CS IONIC_V2_OP_ATOMIC_CS -#elif defined(GDA_MLX5) -#define GDA_MAX_ATOMIC 1 -#define GDA_OP_RDMA_WRITE MLX5_OPCODE_RDMA_WRITE -#define GDA_OP_RDMA_READ MLX5_OPCODE_RDMA_READ -#define GDA_OP_ATOMIC_FA MLX5_OPCODE_ATOMIC_FA -#define GDA_OP_ATOMIC_CS MLX5_OPCODE_ATOMIC_CS -#endif - namespace rocshmem { class GDABackend; -typedef union db_reg { - uint64_t *ptr; - uintptr_t uint; -} db_reg_t; - -#define SPIN_LOCK_INVALID 0xdead -#define SPIN_LOCK_UNLOCKED 0x1234 -#define SPIN_LOCK_LOCKED 0xabcd - class QueuePair { public: friend GDABackend; @@ -86,7 +56,7 @@ class QueuePair { /** * @brief Constructor. */ - explicit QueuePair(struct ibv_pd* pd); + explicit QueuePair(struct ibv_pd* pd, int gda_vendor); /** * @brief Destructor. @@ -125,11 +95,10 @@ class QueuePair { * @param[in] value Data value for the atomic operation. * @param[in] cond Used in atomic comparisons. * @param[in] pe Destination processing element of data transmission. - * @param[in] atomic_op The atomic operation to perform. * * @return An atomic value */ - __device__ int64_t atomic_fetch(void *dest, int64_t value, int64_t cond, int pe, uint8_t atomic_op); + __device__ int64_t atomic_fetch(void *dest, int64_t value, int64_t cond, int pe); /** * @brief Create and enqueue an atomic fetch work queue entry (wqe). @@ -138,9 +107,30 @@ class QueuePair { * @param[in] value Data value for the atomic operation. * @param[in] cond Used in atomic comparisons. * @param[in] pe Destination processing element of data transmission. - * @param[in] atomic_op The atomic operation to perform. */ - __device__ void atomic_nofetch(void *dest, int64_t value, int64_t cond, int pe, uint8_t atomic_op); + __device__ void atomic_nofetch(void *dest, int64_t value, int64_t cond, int pe); + + /** + * @brief Create and enqueue an atomic cas work queue entry (wqe). + * + * @param[in] dest Destination address for data transmission. + * @param[in] value Data value for the atomic operation. + * @param[in] cond Used in atomic comparisons. + * @param[in] pe Destination processing element of data transmission. + * + * @return An atomic value + */ + __device__ int64_t atomic_cas(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe); + + /** + * @brief Create and enqueue an atomic cas work queue entry (wqe). + * + * @param[in] dest Destination address for data transmission. + * @param[in] value Data value for the atomic operation. + * @param[in] cond Used in atomic comparisons. + * @param[in] pe Destination processing element of data transmission. + */ + __device__ int64_t atomic_cas_nofetch(void *dest, int64_t atomic_data, int64_t atomic_cmp, int pe); char *const *base_heap{nullptr}; @@ -233,14 +223,18 @@ class QueuePair { uint32_t sq_dbprod{0}; uint32_t sq_prod{0}; uint32_t sq_msn{0}; +#endif -#elif defined(GDA_BNXT) + /* GDAVendor::BNXT START */ uint64_t *dbr; struct bnxt_device_cq cq; struct bnxt_device_sq sq; __device__ int poll_cq(); -#else // GDA_MLX5 + + /* GDAVendor::BNXT END */ + + /* GDAVendor::MLX5 START */ db_reg_t db{}; @@ -301,7 +295,7 @@ class QueuePair { static constexpr size_t OUTSTANDING_TABLE_SIZE = 65536; uint64_t outstanding_wqes[OUTSTANDING_TABLE_SIZE]{0}; -#endif // GDA_IONIC + /* GDAVendor::MLX5 END */ uint32_t inline_threshold{0}; @@ -325,6 +319,10 @@ class QueuePair { HIPAllocator allocator{}; + uint8_t gda_op_rdma_write; + uint8_t gda_op_rdma_read; + uint8_t gda_op_atomic_fa; + uint8_t gda_op_atomic_cs; }; } // namespace rocshmem diff --git a/src/gda/segment_builder.hpp b/src/gda/segment_builder.hpp index c5ce93aca6..51861cd0f2 100644 --- a/src/gda/segment_builder.hpp +++ b/src/gda/segment_builder.hpp @@ -25,7 +25,7 @@ #ifndef LIBRARY_SRC_GDA_SEGMENT_BUILDER_HPP_ #define LIBRARY_SRC_GDA_SEGMENT_BUILDER_HPP_ -#include +#include "gda/mlx5/provider_gda_mlx5.hpp" #include "util.hpp"