From 7aecbdec4d9638480ad59e8fefdc0ced415166a0 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Tue, 25 Mar 2025 07:50:15 -0500 Subject: [PATCH] update README documentation for RO (#63) * README: update documentation for RO support update the README and the install_dependencies script to match the requirements of the RO conduit. * add CODEOWNERS file [ROCm/rocshmem commit: 4e48c9748e9a3a18a235f19aa15795071bb9f217] --- projects/rocshmem/.github/CODEOWNERS | 1 + projects/rocshmem/README.md | 39 ++++++++++++++----- .../rocshmem/scripts/install_dependencies.sh | 2 - 3 files changed, 31 insertions(+), 11 deletions(-) create mode 100755 projects/rocshmem/.github/CODEOWNERS diff --git a/projects/rocshmem/.github/CODEOWNERS b/projects/rocshmem/.github/CODEOWNERS new file mode 100755 index 0000000000..d59fc4c7ca --- /dev/null +++ b/projects/rocshmem/.github/CODEOWNERS @@ -0,0 +1 @@ +@avinashkethineedi @Yiltan @BKP @abouteiller @edgargabriel @gaoikawa \ No newline at end of file diff --git a/projects/rocshmem/README.md b/projects/rocshmem/README.md index 5e80cd9a16..2a615af785 100644 --- a/projects/rocshmem/README.md +++ b/projects/rocshmem/README.md @@ -10,9 +10,6 @@ rocSHMEM uses a single symmetric heap (SHEAP) that is allocated on GPU memories. There are currently two backends for rocSHMEM; IPC and Reverse Offload (RO). The backends primarily differ in their implementations of intra-kernel networking. -Currently, only the IPC backend is supported. -The RO backend is provided as-is with -no guarantees of support from AMD or AMD Research. The IPC backend implements communication primitives using load/store operations issued from the GPU. @@ -21,6 +18,8 @@ to the host-side runtime, which calls into a traditional MPI or OpenSHMEM implementation. This forwarding of requests is transparent to the programmer, who only sees the GPU-side interface. +The RO backend is provided as-is with limited support from AMD or AMD Research. + ## Requirements rocSHMEM base requirements: @@ -40,7 +39,7 @@ OpenCL. rocSHMEM uses the CMake build system. The CMakeLists file contains additional details about library options. -To create an out-of-source build for the IPC backend: +To create an out-of-source build for the IPC backend for single-node use-cases: ``` mkdir build @@ -48,6 +47,14 @@ cd build ../scripts/build_configs/ipc_single ``` +To create an out-of-source build for the RO backend for multi-node use-cases that can also utilize the IPC mechanisms for certain intra-node operations: + +``` +mkdir build +cd build +../scripts/build_configs/ro_ipc +``` + The build script passes configuration options to CMake to setup canonical builds. There are other scripts in `./scripts/build_configs` directory but currently, only `ipc_single` is supported. @@ -111,6 +118,12 @@ rocSHMEM has the following enviroment variables: ROCSHMEM_RO_DISABLE_IPC (default : 0) Disables IPC support for the reverse offload backend. + + ROCSHMEM_MAX_NUM_CONTEXTS (default : 1024) + Maximum number of contexts used in library + + ROCSHMEM_MAX_NUM_TEAMS (default : 40) + Maximum number of teams supported by the library ``` ## Examples @@ -124,7 +137,7 @@ or to look at the provided example code in the `./example/` directory. The examples can be run like so: ``` -mpirun -np 2 ./build/examples/rocshmem_getmem_test +mpirun --map-by numa --mca pml ucx --mca osc ucx -np 2 ./build/examples/rocshmem_getmem_test ``` ## Tests @@ -135,7 +148,7 @@ To run the tests, you may use the driver scripts provided in the `./scripts/` di ``` # Run Functional Tests -./scripts/functional_tests/driver.sh ./build/tests/functional_tests/rocshmem_example_driver short +./scripts/functional_tests/driver.sh ./build/tests/functional_tests/rocshmem_example_driver all # Run Unit Tests ./scripts/unit_tests/driver.sh ./build/tests/unit_tests/rocshmem_unit_tests all @@ -153,7 +166,7 @@ To build and configure ROCm-Aware UCX (1.17.0 or later), you need to: git clone https://github.com/openucx/ucx.git -b v1.17.x cd ucx ./autogen.sh -./configure --prefix= --with-rocm= --enable-mt +./configure --prefix= --with-rocm= --enable-mt make -j 8 make -j 8 install ``` @@ -164,13 +177,21 @@ Then, you need to build Open MPI (5.0.6 or later) with UCX support. git clone --recursive https://github.com/open-mpi/ompi.git -b v5.0.x cd ompi ./autogen.pl -./configure --prefix= --with-rocm= --with-ucx= +./configure --prefix= --with-rocm= --with-ucx= make -j 8 make -j 8 install ``` +After compiling and installing UCX and Open MPI, please update your PATH and LD_LIBRARY_PATH to point to the installation locations, e.g. + +``` +export PATH=/bin:$PATH +export LD_LIBRARY_PATH=/lib:/lib:$LD_LIBRARY_PATH +``` + + Alternatively, we have script to install dependencies. -However, it is not gauranteed to work and perform optimally on all platforms. +However, it is not guaranteed to work and perform optimally on all platforms. Configuration options are platform dependent. ``` diff --git a/projects/rocshmem/scripts/install_dependencies.sh b/projects/rocshmem/scripts/install_dependencies.sh index 56e76c307a..b7c2254cb6 100755 --- a/projects/rocshmem/scripts/install_dependencies.sh +++ b/projects/rocshmem/scripts/install_dependencies.sh @@ -35,7 +35,6 @@ git checkout $_UCX_COMMIT_HASH --without-go \ --without-java \ --without-cuda \ - --without-verbs \ --without-knem make -j make install @@ -56,7 +55,6 @@ git submodule update --init --recursive --with-hwloc=internal \ --with-libevent=internal \ --without-cuda \ - --disable-sphinx \ --disable-mpi-fortran \ --without-ofi make -j