diff --git a/projects/rocshmem/docs/compile_and_run.rst b/projects/rocshmem/docs/compile_and_run.rst index 767c923283..5e72899da6 100644 --- a/projects/rocshmem/docs/compile_and_run.rst +++ b/projects/rocshmem/docs/compile_and_run.rst @@ -90,3 +90,6 @@ You can control the behavior of rocSHMEM by using the following environment vari * - ROCSHMEM_GDA_ALTERNATE_QP_PORTS - 1 - Enables/Disables having QPs alternate their mappings across rocSHMEM contexts. This helps saturate bandwidth on multiport bonded interfaces. + * - ROCSHMEM_GDA_PCIE_RELAXED_ORDERING + - 0 + - Enables PCIe Relaxed Ordering when registering the symemtric heap with the RDMA NICs. diff --git a/projects/rocshmem/src/envvar.cpp b/projects/rocshmem/src/envvar.cpp index ea20236bd7..c9c5b9cc3c 100644 --- a/projects/rocshmem/src/envvar.cpp +++ b/projects/rocshmem/src/envvar.cpp @@ -68,6 +68,7 @@ namespace envvar { const var provider("PROVIDER", ""); const var alternate_qp_ports("ALTERNATE_QP_PORTS", "", true); const var traffic_class("TRAFFIC_CLASS", "", 0); + const var pcie_relaxed_ordering("PCIE_RELAXED_ORDERING", "", false); } // namespace gda namespace _detail { diff --git a/projects/rocshmem/src/envvar.hpp b/projects/rocshmem/src/envvar.hpp index 4838f315ca..5c1b9ac90f 100644 --- a/projects/rocshmem/src/envvar.hpp +++ b/projects/rocshmem/src/envvar.hpp @@ -451,6 +451,7 @@ namespace envvar { extern const var provider; extern const var alternate_qp_ports; extern const var traffic_class; + extern const var pcie_relaxed_ordering; } // namespace gda } // namespace envvar } // namespace rocshmem diff --git a/projects/rocshmem/src/gda/backend_gda.cpp b/projects/rocshmem/src/gda/backend_gda.cpp index 841c7c6c14..2a6d7a7cf3 100644 --- a/projects/rocshmem/src/gda/backend_gda.cpp +++ b/projects/rocshmem/src/gda/backend_gda.cpp @@ -756,6 +756,9 @@ void GDABackend::setup_heap_memory_rkey() { auto *base_heap = heap.get_local_heap_base(); int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_ATOMIC; + if (envvar::gda::pcie_relaxed_ordering) { + access |= IBV_ACCESS_RELAXED_ORDERING; + } heap_mr = ibv.reg_mr(pd_orig, base_heap, heap.get_size(), access); CHECK_NNULL(heap_mr, "ibv_reg_mr"); diff --git a/projects/rocshmem/src/gda/queue_pair.cpp b/projects/rocshmem/src/gda/queue_pair.cpp index 07e3ee3e7b..2499d6cc22 100644 --- a/projects/rocshmem/src/gda/queue_pair.cpp +++ b/projects/rocshmem/src/gda/queue_pair.cpp @@ -38,6 +38,9 @@ QueuePair::QueuePair(struct ibv_pd* pd, int gda_provider) { | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_ATOMIC; + if (envvar::gda::pcie_relaxed_ordering) { + access |= IBV_ACCESS_RELAXED_ORDERING; + } allocator.allocate((void**)&nonfetching_atomic, 8); allocator.allocate((void**)&fetching_atomic, 8 * FETCHING_ATOMIC_CNT); allocator.allocate((void**)&fetching_atomic_freelist, sizeof(FreeListT*));