Improve qp mapping (#259)
Co-authored-by: Aurelien Bouteiller <aurelien.bouteiller@amd.com>
Этот коммит содержится в:
+6
-2
@@ -125,10 +125,14 @@ rocSHMEM has the following enviroment variables:
|
||||
Disables IPC support for the reverse offload backend.
|
||||
|
||||
ROCSHMEM_MAX_NUM_CONTEXTS (default : 1024)
|
||||
Maximum number of contexts used in library
|
||||
Maximum number of contexts used in library.
|
||||
|
||||
ROCSHMEM_MAX_NUM_TEAMS (default : 40)
|
||||
Maximum number of teams supported by the library
|
||||
Maximum number of teams supported by the library.
|
||||
|
||||
ROCSHMEM_GDA_ALTERNATE_QP_PORTS (default : 1)
|
||||
Enables/Disables having QPs alternate their mappings
|
||||
across rocSHMEM contexts.
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
@@ -15,10 +15,10 @@ Compiling and linking with rocSHMEM
|
||||
|
||||
rocSHMEM is a library that can be statically linked to your application during compilation with ``hipcc``. For more information, see :doc:`HIPCC <hipcc:index>`.
|
||||
|
||||
When compiling your application with ``hipcc``, you must include the rocSHMEM header files and the rocSHMEM library.
|
||||
When compiling your application with ``hipcc``, you must include the rocSHMEM header files and the rocSHMEM library.
|
||||
Because rocSHMEM depends on MPI (Message Passing Interface), you must manually add the arguments for MPI linkage instead of using ``mpicc``.
|
||||
|
||||
When using ``hipcc`` directly without a build system, it's recommended to perform the compilation and linking steps separately.
|
||||
When using ``hipcc`` directly without a build system, it's recommended to perform the compilation and linking steps separately.
|
||||
|
||||
Example compile and link commands are provided at the top of the example files in the ``examples`` directory:
|
||||
|
||||
@@ -36,13 +36,13 @@ Example compile and link commands are provided at the top of the example files i
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
|
||||
If your project uses CMake, see
|
||||
If your project uses CMake, see
|
||||
`Using CMake with AMD ROCm <https://rocmdocs.amd.com/en/latest/conceptual/cmake-packages.html>`_.
|
||||
|
||||
Running a rocSHMEM application
|
||||
--------------------------
|
||||
|
||||
Applications using rocSHMEM typically deploy multiple processes, usually one per GPU.
|
||||
Applications using rocSHMEM typically deploy multiple processes, usually one per GPU.
|
||||
The MPI launcher, for example, ``mpiexec`` with Open MPI, is used to start the required number
|
||||
of processes. For example, to launch two ``getmem`` example processes (available when compiled from source):
|
||||
|
||||
@@ -87,3 +87,6 @@ You can control the behavior of rocSHMEM by using the following environment vari
|
||||
* - ROCSHMEM_RO_DISABLE_IPC
|
||||
- 0
|
||||
- Defines whether to force using the RO conduit even when IPC is available.
|
||||
* - ROCSHMEM_GDA_ALTERNATE_QP_PORTS
|
||||
- 1
|
||||
- Enables/Disables having QPs alternate their mappings across rocSHMEM contexts. This helps saturate bandwidth on multiport bonded interfaces.
|
||||
|
||||
@@ -144,6 +144,10 @@ void GDABackend::read_env() {
|
||||
if ((value = getenv("ROCSHMEM_SQ_SIZE"))) {
|
||||
sq_size = atoi(value);
|
||||
}
|
||||
|
||||
if ((value = getenv("ROCSHMEM_GDA_ALTERNATE_QP_PORTS"))) {
|
||||
alternate_qp_ports_enabled = atoi(value);
|
||||
}
|
||||
}
|
||||
|
||||
void GDABackend::setup_ipc() {
|
||||
@@ -938,6 +942,57 @@ void GDABackend::create_queues() {
|
||||
create_cqs(ncqes);
|
||||
create_qps(sq_size);
|
||||
}
|
||||
|
||||
alternate_qp_ports();
|
||||
}
|
||||
|
||||
void GDABackend::alternate_qp_ports() {
|
||||
int cur_qp_idx;
|
||||
int new_qp_idx;
|
||||
|
||||
/* We can't remap anything */
|
||||
if (maximum_num_contexts_ == 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (alternate_qp_ports_enabled) {
|
||||
/* If we assume two PEs and a default context and two user context,
|
||||
* initially QPs are in the following port order:
|
||||
*
|
||||
* Labels :| DCTX PE0 | DCTX PE1 | CTX0 PE0 | CTX0 PE1 | CTX1 PE0 | CTX1 PE1 |
|
||||
* QPs :| QP0 | QP1 | QP2 | QP3 | QP4 | QP5 |
|
||||
* Port :| 0 | 1 | 0 | 1 | 0 | 1 |
|
||||
*
|
||||
* This creates the pattern where PE1 is always mapped to port 0 but we want it
|
||||
* to use both ports to maximize throughput/bandwidth.
|
||||
*
|
||||
* So we reorder our QPs
|
||||
*
|
||||
* Labels :| DCTX PE0 | DCTX PE1 | CTX0 PE0 | CTX0 PE1 | CTX1 PE0 | CTX1 PE1 |
|
||||
* QPs :| QP0 | QP1 | QP2 | QP4 | QP3 | QP5 |
|
||||
* Port :| 0 | 1 | 1 | 0 | 0 | 1 |
|
||||
*
|
||||
* We alternate the ports [0,1] and [1,0] for each context.
|
||||
* Therefore, when we use two contexts we use both ports
|
||||
*
|
||||
*/
|
||||
|
||||
/* Re-Map each context */
|
||||
for (int i = 1; i < (maximum_num_contexts_ + 1); i+=2) {
|
||||
for (int p = 0; p < num_pes; p+=2) {
|
||||
cur_qp_idx = (i * num_pes) + p;
|
||||
new_qp_idx = cur_qp_idx + 1;
|
||||
|
||||
if (new_qp_idx < qps.size()) {
|
||||
// Swap QPs
|
||||
std::swap(cqs[cur_qp_idx], cqs[new_qp_idx]);
|
||||
std::swap(qps[cur_qp_idx], qps[new_qp_idx]);
|
||||
std::swap(bnxt_cqs[cur_qp_idx], bnxt_cqs[new_qp_idx]);
|
||||
std::swap(bnxt_qps[cur_qp_idx], bnxt_qps[new_qp_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void* GDABackend::pd_alloc_device_uncached(struct ibv_pd* pd, void* pd_context, size_t size, size_t alignment, uint64_t resource_type) {
|
||||
|
||||
@@ -127,6 +127,7 @@ class GDABackend : public Backend {
|
||||
std::vector<ibv_qp*> qps;
|
||||
std::vector<ibv_cq*> cqs;
|
||||
std::vector<dest_info_t> dest_info;
|
||||
int alternate_qp_ports_enabled = 1;;
|
||||
|
||||
/* GDA_BNXT START */
|
||||
std::vector<struct bnxt_host_qp> bnxt_qps;
|
||||
@@ -376,6 +377,11 @@ class GDABackend : public Backend {
|
||||
void create_qps(int sq_length);
|
||||
void bnxt_create_qps(int sq_length);
|
||||
|
||||
/**
|
||||
* @brief Reorders QPs to that we map rocSHMEM contexts to the correct QPs
|
||||
*/
|
||||
void alternate_qp_ports();
|
||||
|
||||
/**
|
||||
* @brief Exchange QP information for connection
|
||||
*/
|
||||
|
||||
Ссылка в новой задаче
Block a user