Enable RO CI (#65)

此提交包含在:
Yiltan
2025-04-08 16:12:22 -04:00
提交者 GitHub
父節點 dc61bca066
當前提交 25e7109b64
共有 2 個檔案被更改,包括 48 行新增28 行删除
+46 -26
查看文件
@@ -104,6 +104,8 @@ ExecTest() {
NUM_THREADS=$4
MAX_MSG_SIZE=$5
TIMEOUT=$((5 * 60)) # Timeout in seconds
TEST_NUM=${TEST_NUMBERS[$TEST_NAME]}
if [[ "" == "$TEST_NUM" ]]
@@ -120,7 +122,10 @@ ExecTest() {
# MPI Parameters
LAUNCHER=mpirun
OPTIONS=" -n $NUM_RANKS -mca pml ucx -x ROCSHMEM_MAX_NUM_CONTEXTS=$ROCSHMEM_MAX_NUM_CONTEXTS"
OPTIONS=" -n $NUM_RANKS -mca pml ucx -mca osc ucx"
OPTIONS+=" -x ROCSHMEM_MAX_NUM_CONTEXTS=$ROCSHMEM_MAX_NUM_CONTEXTS"
OPTIONS+=" -x UCX_ROCM_IPC_SIGPOOL_MAX_ELEMS=16384"
OPTIONS+=" --map-by numa --timeout $TIMEOUT"
if [[ "" != "$HOSTFILE" ]]
then
@@ -153,7 +158,7 @@ ExecTest() {
unset ROCSHMEM_MAX_NUM_CONTEXTS
}
TestRMA() {
TestRMAPut() {
##############################################################################
# | Name | Ranks | Workgroups | Threads | Max Message Size #
##############################################################################
@@ -176,30 +181,6 @@ TestRMA() {
ExecTest "teamctxput" 2 4 128 1024
ExecTest "teamctxput" 2 16 256 1024
ExecTest "get" 2 1 1 1048576
ExecTest "get" 2 1 1024 512
ExecTest "get" 2 8 1 1048576
ExecTest "get" 2 16 128 8
ExecTest "get" 2 32 256 512
ExecTest "get" 2 64 1024 8
ExecTest "wgget" 2 1 64 1048576
ExecTest "wgget" 2 2 64 1048576
ExecTest "wgget" 2 16 64 8
ExecTest "waveget" 2 1 64 1048576
ExecTest "waveget" 2 2 64 1048576
ExecTest "waveget" 2 2 128 1048576
ExecTest "waveget" 2 16 128 8
ExecTest "teamctxget" 2 4 128 1024
ExecTest "teamctxget" 2 16 256 1024
ExecTest "g" 2 1 1 128
ExecTest "g" 2 1 1024 2
ExecTest "g" 2 8 1 32
ExecTest "g" 2 16 128 4
ExecTest "p" 2 1 1 128
ExecTest "p" 2 1 1024 2
ExecTest "p" 2 8 1 32
@@ -225,6 +206,37 @@ TestRMA() {
ExecTest "teamctxputnbi" 2 4 128 1024
ExecTest "teamctxputnbi" 2 16 256 1024
}
TestRMAGet() {
##############################################################################
# | Name | Ranks | Workgroups | Threads | Max Message Size #
##############################################################################
ExecTest "get" 2 1 1 1048576
ExecTest "get" 2 1 1024 512
ExecTest "get" 2 8 1 1048576
ExecTest "get" 2 16 128 8
ExecTest "get" 2 32 256 512
ExecTest "get" 2 64 1024 8
ExecTest "wgget" 2 1 64 1048576
ExecTest "wgget" 2 2 64 1048576
ExecTest "wgget" 2 16 64 8
ExecTest "waveget" 2 1 64 1048576
ExecTest "waveget" 2 2 64 1048576
ExecTest "waveget" 2 2 128 1048576
ExecTest "waveget" 2 16 128 8
ExecTest "teamctxget" 2 4 128 1024
ExecTest "teamctxget" 2 16 256 1024
ExecTest "g" 2 1 1 128
ExecTest "g" 2 1 1024 1
ExecTest "g" 2 8 1 32
ExecTest "g" 2 16 128 4
################################ Non-Blocking ################################
ExecTest "getnbi" 2 1 1 1048576
ExecTest "getnbi" 2 1 1024 512
@@ -246,6 +258,13 @@ TestRMA() {
ExecTest "teamctxgetnbi" 2 16 256 1024
}
TestRMA() {
TestRMAPut
if [ "0" == "$ROCSHMEM_DRIVER_DISABLE_GET" ]; then
TestRMAGet
fi
}
TestAMO() {
##############################################################################
# | Name | Ranks | Workgroups | Threads | Max Message Size #
@@ -429,6 +448,7 @@ LOG_DIR=$3
HOSTFILE=$4
DRIVER_RETURN_STATUS=0
ROCSHMEM_DRIVER_DISABLE_GET="${ROCSHMEM_DRIVER_DISABLE_GET:-1}"
ValidateInput $#
ValidateLogDir $LOG_DIR
+2 -2
查看文件
@@ -16,11 +16,11 @@ mkdir -p $_DEPS_SRC_DIR
#Adjust branches and installation location as necessary
export _UCX_INSTALL_DIR=$_INSTALL_DIR/ucx
export _UCX_REPO=https://github.com/ROCm/ucx.git
export _UCX_COMMIT_HASH=4ef9a097c12ee6f7a8d3e41c317ea2d47e424b32
export _UCX_COMMIT_HASH=18770fdc1c3b5de202d14a088a14b734d2c4bbf3
export _OMPI_INSTALL_DIR=$_INSTALL_DIR/ompi
export _OMPI_REPO=https://github.com/ROCm/ompi.git
export _OMPI_COMMIT_HASH=8a5c2ef25dc8e4528f0d3fd2ec91a6578160af95
export _OMPI_COMMIT_HASH=720f556508ad3f2cbb17341eb184c2d8565a5133
# Step 1: Build UCX with ROCm support
cd $_DEPS_SRC_DIR