0c2c61d2f1
* Added python test runner to execute rccl tests * Disabled capture output to avoid hangs * Add RCCL_TEST_MPI_HOSTFILE env var to get the hostfile * Converted test_type to boolean gtest flag * Removed unused return values * Added custom rccl library usage * Removed json output * Updates to test_runner: added num_gpus field * Address review comments * Prepend env vars for single node, single process executions * Added separate enums for exit and result codes * Update configuration files * Moved configurations to its own dir * Address review comments * Update tools/scripts/test_runner/README.md Co-authored-by: Corey Derochie <161367113+corey-derochie-amd@users.noreply.github.com> --------- Co-authored-by: Corey Derochie <161367113+corey-derochie-amd@users.noreply.github.com>
127 wiersze
2.8 KiB
JSON
127 wiersze
2.8 KiB
JSON
{
|
|
"system_configurations": {
|
|
"name": "rccl-test-system",
|
|
"description": "Optional description of the system"
|
|
},
|
|
|
|
"paths": {
|
|
"workdir": "${WORKDIR:-/path/to/rccl}",
|
|
"rocm_path": "${ROCM_PATH:-/opt/rocm}",
|
|
"mpi_path": "${MPI_PATH:-/opt/ompi}",
|
|
"test_binary_dir": "${RCCL_TEST_BIN_DIR:-build/test}"
|
|
},
|
|
|
|
"env_variables": {
|
|
"HSA_NO_SCRATCH_RECLAIM": "1",
|
|
"NCCL_DEBUG": "WARN"
|
|
},
|
|
|
|
"build_configuration": {
|
|
"cmake_options": {
|
|
"CMAKE_BUILD_TYPE": "Release",
|
|
"BUILD_TESTS": "ON"
|
|
},
|
|
"env_variables": {
|
|
"HIPCC_COMPILE_FLAGS_APPEND": "-O2"
|
|
},
|
|
"parallel_jobs": 64,
|
|
"generator": "Unix Makefiles"
|
|
},
|
|
|
|
"test_configurations": {
|
|
"base_config": {
|
|
"env_variables": {
|
|
"NCCL_LAUNCH_MODE": "GROUP"
|
|
},
|
|
"args": ["--verbose"],
|
|
"mpi_args": ["--bind-to none"]
|
|
},
|
|
|
|
"gtest_config": {
|
|
"extends": "base_config",
|
|
"is_gtest": true,
|
|
"binary": "rccl-UnitTests",
|
|
"num_ranks": 1,
|
|
"num_nodes": 1,
|
|
"num_gpus": 8,
|
|
"timeout": 120,
|
|
"env_variables": {
|
|
"NCCL_DEBUG": "INFO"
|
|
},
|
|
"tests": [
|
|
{
|
|
"name": "AllReduceTest",
|
|
"description": "Test AllReduce with specific parameters",
|
|
"is_gtest": true,
|
|
"binary": "rccl-UnitTests",
|
|
"test_filter": "AllReduce.InPlace",
|
|
"command_args": "--gtest_also_run_disabled_tests",
|
|
"num_ranks": 1,
|
|
"num_nodes": 1,
|
|
"num_gpus": 4,
|
|
"timeout": 60,
|
|
"env_variables": {
|
|
"NCCL_DEBUG": "TRACE"
|
|
}
|
|
},
|
|
{
|
|
"name": "BroadcastTest",
|
|
"test_filter": "Broadcast.*"
|
|
}
|
|
]
|
|
},
|
|
|
|
"mpi_config": {
|
|
"extends": "base_config",
|
|
"binary": "rccl-UnitTestsMPI",
|
|
"num_ranks": 2,
|
|
"num_nodes": 1,
|
|
"timeout": 180,
|
|
"tests": [
|
|
{"name": "P2pTest", "test_filter": "P2pMPITest.*"},
|
|
{"name": "ShmTest", "test_filter": "ShmMPITest.*"}
|
|
]
|
|
},
|
|
|
|
"perf_config": {
|
|
"is_gtest": false,
|
|
"binary": "all_reduce_perf",
|
|
"num_ranks": 8,
|
|
"num_nodes": 2,
|
|
"num_gpus": 4,
|
|
"timeout": 300,
|
|
"tests": [
|
|
{
|
|
"name": "AllReducePerf",
|
|
"command_args": "-b 8 -e 128M -f 2 -g 1"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
|
|
"test_suites": [
|
|
{
|
|
"name": "unit_tests",
|
|
"description": "Unit tests with GTest",
|
|
"config": "gtest_config",
|
|
"enabled": true,
|
|
"num_ranks": 1,
|
|
"num_nodes": 1,
|
|
"num_gpus": 8,
|
|
"timeout": 200,
|
|
"env_variables": {
|
|
"NCCL_DEBUG_SUBSYS": "INIT"
|
|
}
|
|
},
|
|
{
|
|
"name": "mpi_tests",
|
|
"config": "mpi_config"
|
|
},
|
|
{
|
|
"name": "perf_tests",
|
|
"config": "perf_config",
|
|
"enabled": false
|
|
}
|
|
]
|
|
}
|