From 30d36661c253541955e8f7644b5443e5e9fab7c0 Mon Sep 17 00:00:00 2001
From: Atul Kulkarni <atul.kulkarni@amd.com>
Date: Thu, 8 Jan 2026 08:04:41 -0800
Subject: [PATCH] Adds Python-based test runner for RCCL (#2034)

* Added python test runner to execute rccl tests

* Disabled capture output to avoid hangs

* Add RCCL_TEST_MPI_HOSTFILE env var to get the hostfile

* Converted test_type to boolean gtest flag

* Removed unused return values

* Added custom rccl library usage

* Removed json output

* Updates to test_runner: added num_gpus field

* Address review comments

* Prepend env vars for single node, single process executions

* Added separate enums for exit and result codes

* Update configuration files

* Moved configurations to its own dir

* Address review comments

* Update tools/scripts/test_runner/README.md

Co-authored-by: Corey Derochie <161367113+corey-derochie-amd@users.noreply.github.com>

---------

Co-authored-by: Corey Derochie <161367113+corey-derochie-amd@users.noreply.github.com>

[ROCm/rccl commit: 0c2c61d2f1c112b1eede47f119dcf5593ff0db2c]
---
 .../rccl/tools/scripts/test_runner/README.md  | 984 ++++++++++++++++++
 .../configs/mi300x_mellanox_ib.json           | 506 +++++++++
 .../test_runner/configs/rccl_perf_tests.json  | 458 ++++++++
 .../configs/test_config_sample.json           | 126 +++
 .../tools/scripts/test_runner/lib/__init__.py |  20 +
 .../scripts/test_runner/lib/test_config.py    | 401 +++++++
 .../scripts/test_runner/lib/test_executor.py  | 858 +++++++++++++++
 .../scripts/test_runner/lib/test_parser.py    | 167 +++
 .../tools/scripts/test_runner/test_runner.py  | 124 +++
 9 files changed, 3644 insertions(+)
 create mode 100644 projects/rccl/tools/scripts/test_runner/README.md
 create mode 100644 projects/rccl/tools/scripts/test_runner/configs/mi300x_mellanox_ib.json
 create mode 100644 projects/rccl/tools/scripts/test_runner/configs/rccl_perf_tests.json
 create mode 100644 projects/rccl/tools/scripts/test_runner/configs/test_config_sample.json
 create mode 100644 projects/rccl/tools/scripts/test_runner/lib/__init__.py
 create mode 100644 projects/rccl/tools/scripts/test_runner/lib/test_config.py
 create mode 100644 projects/rccl/tools/scripts/test_runner/lib/test_executor.py
 create mode 100644 projects/rccl/tools/scripts/test_runner/lib/test_parser.py
 create mode 100755 projects/rccl/tools/scripts/test_runner/test_runner.py

diff --git a/projects/rccl/tools/scripts/test_runner/README.md b/projects/rccl/tools/scripts/test_runner/README.md
new file mode 100644
index 0000000000..7d256a6ebe
--- /dev/null
+++ b/projects/rccl/tools/scripts/test_runner/README.md
@@ -0,0 +1,984 @@
+# RCCL Test Runner
+
+A Python-based test runner focused on RCCL unit and functional tests with hierarchical configuration support and integrated code coverage reporting. Extensible to support performance benchmarks, MPI tests, and custom test scripts.
+
+## Overview
+
+This test runner provides a maintainable, extensible alternative to shell-based test execution. It uses JSON configuration files with hierarchical inheritance, and integrates with LLVM code coverage tools.
+
+## Key Features
+
+- **Multiple Test Types**: Support for GTest, performance tests, and custom executables
+- **Hierarchical Configuration**: Use `"extends"` directive to inherit and merge configurations
+- **Environment Variable Management**: Global, configuration, suite, and test-specific environment variables
+- **Path Variable Expansion**: Use environment variables in paths with nested default value expansion
+- **Custom Library Support**: Use pre-built RCCL libraries from custom locations via environment variables
+- **Configurable Build System**: Customize CMake options, environment variables, and parallel jobs via config
+- **MPI Support**: Full support for multi-rank and multi-node tests
+- **Flexible Test Filtering**: Run all tests, specific test suites, or individual tests
+- **Build Integration**: Automated RCCL building with CMake
+- **Code Coverage**: Integrated LLVM coverage report generation (HTML and text)
+- **Clean Output**: Automatic filtering of MPI verbose messages (enable with --verbose)
+- **Verbose Logging**: Detailed output for debugging and troubleshooting
+
+## Quick Start
+
+### Basic Usage
+
+```bash
+# Run with specific configuration
+python test_runner.py --config my_tests.json
+
+# Run with verbose output
+python test_runner.py --config my_tests.json --verbose
+
+# Run specific test by name
+python test_runner.py --config my_tests.json --test-name SHM_ComprehensiveWorkflow
+```
+
+### Generate Coverage Report
+
+```bash
+# Build, run tests, and generate coverage report
+python test_runner.py --config test_config_sample.json --coverage-report --verbose
+
+# Use existing build and generate coverage
+python test_runner.py --config test_config_sample.json --no-build --coverage-report
+```
+
+### Use Custom RCCL Library
+
+```bash
+# Use pre-built RCCL library from custom location
+export RCCL_LIB_PATH=/path/to/custom/rccl/build
+python test_runner.py --config test_config_sample.json
+
+# Or use RCCL_BUILD_DIR (alternative name)
+export RCCL_BUILD_DIR=/path/to/custom/rccl/build
+python test_runner.py --config test_config_sample.json
+
+# When set, build step is automatically skipped
+# --no-build is not needed
+```
+
+## Environment Variables
+
+The test runner supports the following environment variables to customize behavior:
+
+### Library and Build Configuration
+
+| Variable | Description | Example |
+|----------|-------------|---------|
+| `RCCL_LIB_PATH` | Path to pre-built RCCL library directory (contains `librccl.so` and `test/` subdirectory). When set, the build step is automatically skipped. | `/path/to/rccl/build` |
+| `RCCL_BUILD_DIR` | Alternative name for `RCCL_LIB_PATH`. Either variable can be used. | `/path/to/rccl/build` |
+| `RCCL_TEST_MPI_HOSTFILE` | Path to MPI hostfile for multi-node tests. | `~/.mpi_hostfile` |
+
+### Configuration Path Variables
+
+These can be overridden via environment variables or specified in the JSON config:
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `WORKDIR` | RCCL source and build directory | Current rccl repository root |
+| `ROCM_PATH` | ROCm installation path | `/opt/rocm` |
+| `MPI_PATH` | MPI installation path | System default or config-specific |
+
+### Priority Order
+
+When determining which RCCL library to use, the test runner follows this priority:
+
+1. **`RCCL_LIB_PATH` or `RCCL_BUILD_DIR` environment variable** (highest priority)
+   - Skips build automatically
+   - Must contain `librccl.so` and `test/` subdirectory
+2. **`--no-build` flag with local build**
+   - Uses local `build_debug_cov_on_tests_on/` directory
+   - Requires prior build
+3. **Default build process** (lowest priority)
+   - Builds RCCL in timestamped directory
+   - Uses CMake configuration from JSON
+
+**Example Usage:**
+
+```bash
+# Priority 1: Use custom library (build skipped automatically)
+export RCCL_LIB_PATH=/path/to/prebuilt/rccl/build
+python test_runner.py --config my_tests.json
+
+# Priority 2: Use existing local build (no new build)
+python test_runner.py --config my_tests.json --no-build
+
+# Priority 3: Fresh build (default)
+python test_runner.py --config my_tests.json
+```
+
+## Configuration File Format
+
+### Basic Structure
+
+```json
+{
+  "system_configurations": {
+    "name": "system-name",
+    "description": "System description"
+  },
+  "paths": {
+    "workdir": "/path/to/rccl",
+    "rocm_path": "/opt/rocm",
+    "mpi_path": "/path/to/mpi"
+  },
+  "env_variables": {
+    "GLOBAL_VAR": "value"
+  },
+  "test_configurations": {
+    "config_name": {
+      "env_variables": {...},
+      "tests": [...]
+    }
+  },
+  "test_suites": [
+    {
+      "name": "Test Suite Name",
+      "config": "config_name",
+      "enabled": true
+    }
+  ]
+}
+```
+
+### Environment Variable Expansion in Paths
+
+The `paths` section supports environment variable expansion, allowing you to avoid hardcoding paths and make configurations portable across different systems.
+
+#### Supported Syntax
+
+```json
+{
+  "paths": {
+    "workdir": "${HOME}/code/rccl",
+    "rocm_path": "$ROCM_PATH",
+    "mpi_path": "${MPI_PATH:-/opt/mpi}"
+  }
+}
+```
+
+**Syntax Options:**
+- `${VAR}` - Expands to the value of `VAR`, left as-is if undefined
+- `$VAR` - Expands to the value of `VAR`, left as-is if undefined
+- `${VAR:-default}` - Expands to the value of `VAR`, or `default` if undefined (bash-style default)
+
+#### Examples
+
+```json
+{
+  "paths": {
+    "workdir": "${WORKDIR:-${HOME}/code/rti/scripts/rccl}",
+    "rocm_path": "${ROCM_PATH:-/opt/rocm}",
+    "mpi_path": "${MPI_PATH:-${HOME}/softwares/ompi}"
+  }
+}
+```
+
+**Usage:**
+```bash
+# Use environment variables
+export WORKDIR=/custom/path/to/rccl
+export ROCM_PATH=/opt/rocm-6.0
+export MPI_PATH=/usr/local/mpi
+
+python test_runner.py --config test_config_sample.json
+
+# Or use defaults (no environment variables set)
+python test_runner.py --config test_config_sample.json
+```
+
+**Benefits:**
+- **Portability**: Share configurations across different systems
+- **Flexibility**: Override paths without modifying config files
+- **CI/CD**: Easy integration with build systems and pipelines
+- **Multi-user**: Same config works for different user environments
+
+### Test Types Supported
+
+The test runner uses the `is_gtest` boolean flag to distinguish between test types:
+
+- **`is_gtest: true`** (default) - GTest-based unit tests using `--gtest_filter` syntax
+- **`is_gtest: false`** - Non-GTest tests (performance benchmarks, custom scripts, etc.)
+
+This simplified approach supports all test categories while reducing configuration complexity.
+
+#### GTest Tests (`is_gtest: true`)
+
+Used for unit tests with GTest framework. The `test_filter` field uses GTest filter syntax.
+
+```json
+{
+  "name": "AllReduce_InPlace",
+  "description": "Test AllReduce collective operation with in-place buffers",
+  "is_gtest": true,
+  "binary": "rccl-UnitTests",
+  "test_filter": "AllReduce.InPlace",
+  "num_ranks": 1,
+  "num_nodes": 1,
+  "timeout": 60
+}
+```
+
+**Command generated:**
+```bash
+./rccl-UnitTests --gtest_filter=AllReduce.InPlace
+```
+
+#### Performance Tests (`is_gtest: false`)
+
+Used for performance benchmarks. Arguments are passed directly without GTest syntax.
+
+```json
+{
+  "name": "Perf_Bandwidth",
+  "description": "Bandwidth benchmark for AllReduce",
+  "is_gtest": false,
+  "binary": "all_reduce_perf",
+  "command_args": "-b 8 -e 128M -f 2",
+  "num_ranks": 2,
+  "num_nodes": 1,
+  "timeout": 300
+}
+```
+
+**Command generated:**
+```bash
+mpirun -np 2 ./all_reduce_perf -b 8 -e 128M -f 2
+```
+
+#### Custom Scripts (`is_gtest: false`)
+
+Used for custom validation scripts or any non-GTest executables.
+
+```json
+{
+  "name": "Custom_Validation",
+  "description": "Custom GPU validation script",
+  "is_gtest": false,
+  "binary": "validate_gpus.sh",
+  "command_args": "--full-check --verbose",
+  "num_ranks": 1,
+  "num_nodes": 1,
+  "timeout": 120
+}
+```
+
+**Command generated:**
+```bash
+./validate_gpus.sh --full-check --verbose
+```
+
+**Key Differences:**
+
+| Feature | `is_gtest: true` | `is_gtest: false` |
+|---------|------------------|-------------------|
+| Test framework | GTest (Google Test) | Any executable |
+| Filter syntax | `--gtest_filter=<pattern>` | Plain arguments |
+| `test_filter` field | GTest pattern (e.g., `Suite.Test*`) | Passed as plain argument |
+| `command_args` field | Appended after filter | Primary argument method |
+| Typical use cases | Unit tests, functional tests | Performance tests, custom scripts |
+
+### Test Definition Fields
+
+| Field | Required | Type | Description |
+|-------|----------|------|-------------|
+| `name` | Yes | string | Unique test identifier |
+| `description` | Recommended | string | Human-readable test description |
+| `is_gtest` | Optional | boolean | Whether test uses GTest framework (default: true). Set to false for perf or custom tests |
+| `binary` | Yes | string | Test binary name (relative to build/test/) |
+| `test_filter` | Optional | string | Test filter (GTest filter syntax for gtest, plain argument for non-gtest) |
+| `command_args` | Optional | string | Additional command-line arguments |
+| `num_ranks` | Optional | integer | Number of MPI ranks (default: 1) |
+| `num_nodes` | Optional | integer | Number of nodes (default: 1) |
+| `num_gpus` | Optional | integer | GPUs per node - controls rank distribution (default: 8) |
+| `timeout` | Optional | integer | Timeout in seconds (0 = unlimited) |
+| `env_variables` | Optional | object | Test-specific environment variables |
+
+### Configuration Inheritance
+
+Use the `"extends"` directive to inherit from parent configurations:
+
+```json
+{
+  "test_configurations": {
+    "base": {
+      "env_variables": {
+        "NCCL_DEBUG": "INFO"
+      }
+    },
+    "shm_tests": {
+      "extends": "base",
+      "env_variables": {
+        "NCCL_SHM_DISABLE": "0"
+      },
+      "tests": [...]
+    },
+    "advanced_shm": {
+      "extends": ["base", "shm_tests"],
+      "env_variables": {
+        "NCCL_SHM_USE_CUDA_MEMCPY": "1"
+      }
+    }
+  }
+}
+```
+
+### Hierarchical Defaults
+
+To reduce repetition, you can specify default values at multiple levels with a clear override hierarchy:
+
+**Priority Order (highest to lowest):**
+1. **Individual test** - highest priority, overrides everything
+2. **Test suite level** - overrides configuration defaults
+3. **Configuration level** - base defaults for all tests in that config
+4. **Built-in defaults** - system fallback values
+
+**Supported default fields:** `is_gtest`, `binary`, `num_ranks`, `num_nodes`, `num_gpus`, `timeout`
+
+#### Example with Three-Level Hierarchy
+
+```json
+{
+  "test_configurations": {
+    "p2p_tests": {
+      "is_gtest": true,
+      "binary": "rccl-UnitTestsMPI",
+      "num_ranks": 2,
+      "num_nodes": 1,
+      "num_gpus": 2,
+      "timeout": 120,
+      "env_variables": {
+        "NCCL_P2P_DISABLE": "0"
+      },
+      "tests": [
+        {
+          "name": "P2P_Basic",
+          "description": "Basic P2P test",
+          "test_filter": "P2pMPITest.Basic"
+          // Uses config defaults: is_gtest=true, binary, num_ranks=2, num_nodes=1, num_gpus=2, timeout=120
+        },
+        {
+          "name": "P2P_LongRunning",
+          "description": "Long-running P2P test",
+          "test_filter": "P2pMPITest.LongRunning",
+          "timeout": 300
+          // Overrides timeout=300, inherits other config defaults
+        }
+      ]
+    }
+  },
+  "test_suites": [
+    {
+      "name": "P2P_Basic_Suite",
+      "config": "p2p_tests",
+      "num_ranks": 4,
+      "num_gpus": 4,
+      "timeout": 180
+      // Suite-level: overrides config's num_ranks, num_gpus, and timeout
+      // Tests in this suite will use: num_ranks=4, num_gpus=4, timeout=180
+    },
+    {
+      "name": "P2P_Stress_Suite",
+      "config": "p2p_tests",
+      "num_nodes": 2,
+      "num_ranks": 4,
+      "num_gpus": 2,
+      "timeout": 600
+      // Suite-level: overrides config's num_nodes, num_ranks, num_gpus, and timeout
+      // Tests in this suite will use: num_nodes=2, num_ranks=4, num_gpus=2, timeout=600
+    }
+  ]
+}
+```
+
+**Benefits:**
+- **Less Repetition**: Define common values once
+- **Easier Maintenance**: Update defaults in one place
+- **Flexible Overrides**: Tests can still customize any field
+- **Cleaner Config**: Shorter, more readable test definitions
+
+## Command-Line Options
+
+```
+Required:
+  -c, --config CONFIG       Test configuration file (JSON format)
+
+Optional:
+  -v, --verbose             Enable verbose output (shows build paths, commands, etc.)
+  -o, --output DIR          Output directory for logs and reports
+  --test-name NAME          Run only specific test by name
+  --no-build                Skip build step and use existing build
+  --skip-tests              Skip test execution (useful with --coverage-report)
+  --coverage-report         Generate code coverage report (HTML + text)
+  --overwrite               Overwrite previous workspace directories
+  --report-suffix SUFFIX    Suffix for report directory (default: blank)
+  -h, --help                Show help message and exit
+```
+
+## Code Coverage Reports
+
+The test runner integrates with LLVM tools to generate comprehensive code coverage reports.
+
+### Generating Coverage
+
+```bash
+# Build and test with coverage (recommended)
+python test_runner.py --config test_config_sample.json --coverage-report --verbose
+
+# Generate report from existing profraw files
+python test_runner.py --config test_config_sample.json --no-build --skip-tests --coverage-report
+```
+
+### Coverage Output
+
+When `--coverage-report` is specified, the runner generates:
+
+1. **HTML Report**: Visual coverage report in `reports/` directory
+   - View with: `firefox reports/index.html`
+   - Shows line-by-line coverage with syntax highlighting
+
+2. **Text Report**: Function-level coverage summary
+   - Location: `reports/function_coverage_report.txt`
+   - Includes per-function and per-file statistics
+
+### Coverage Implementation Details
+
+- Uses LLVM instrumentation (`-fprofile-instr-generate -fcoverage-mapping`)
+- Collects `.profraw` files during test execution
+- Merges profiles with `llvm-profdata`
+- Generates reports with `llvm-cov show` and `llvm-cov report`
+- Filters out irrelevant files (test/, gtest, external dependencies)
+
+## Examples
+
+### Run All Enabled Test Suites
+
+```bash
+python test_runner.py --config test_config_sample.json --verbose
+```
+
+### Run Specific Test
+
+```bash
+python test_runner.py --config test_config_sample.json --test-name P2P_AllTests
+```
+
+### Skip Build (Use Existing)
+
+```bash
+python test_runner.py --config test_config_sample.json --no-build
+```
+
+### Build and Generate Coverage
+
+```bash
+# Full workflow: build, test, coverage
+python test_runner.py --config adhoc_test_config.json --coverage-report --verbose
+```
+
+### Generate Coverage from Existing Build
+
+```bash
+# Skip build, use existing profraw files
+python test_runner.py --config adhoc_test_config.json --no-build --skip-tests --coverage-report
+```
+
+### Custom Output Directory
+
+```bash
+python test_runner.py --config test_config_sample.json -o /path/to/output --verbose
+```
+
+### Run with Overwrite (Clean Previous Results)
+
+```bash
+python test_runner.py --config test_config_sample.json --overwrite --coverage-report
+```
+
+## Environment Variable Merging
+
+Environment variables are merged hierarchically (later values override earlier):
+
+1. **Global** `env_variables` (top-level in config)
+2. **Configuration** `env_variables` (test configuration level)
+3. **Test Suite** `env_variables` (suite level)
+4. **Test-specific** `env_variables` (individual test level)
+
+Example:
+```json
+{
+  "env_variables": {
+    "NCCL_DEBUG": "INFO"
+  },
+  "test_configurations": {
+    "shm_tests": {
+      "env_variables": {
+        "NCCL_SHM_DISABLE": "0"
+      },
+      "tests": [
+        {
+          "name": "SHM_Test",
+          "env_variables": {
+            "NCCL_DEBUG": "TRACE"
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+Result: `NCCL_DEBUG=TRACE`, `NCCL_SHM_DISABLE=0`
+
+## Test Execution
+
+### Single-Node Tests
+
+- All ranks run on a single node
+- Multiple ranks map to different GPUs
+- Examples: SHM tests, P2P tests, unit tests
+
+```json
+{
+  "name": "SHM_Test",
+  "num_ranks": 2,
+  "num_nodes": 1
+}
+```
+
+### Multi-Node Tests
+
+- Ranks distributed across multiple nodes via MPI
+- Requires SLURM allocation or hostfile configuration
+- Use `num_gpus` to control ranks per node (default: 8)
+- Examples: NET transport tests, InfiniBand tests
+
+```json
+{
+  "name": "NET_Test_4Nodes_2GPUs",
+  "num_ranks": 8,
+  "num_nodes": 4,
+  "num_gpus": 2
+}
+```
+
+**`num_gpus` Field:**
+- Controls how many MPI ranks are placed on each node
+- Overrides hostfile `slots` specification
+- For multi-node tests, uses `--map-by ppr:{num_gpus}:node`
+- Default value: 8 (matches typical 8-GPU nodes)
+
+**Example: 2 nodes, 1 GPU per node**
+```json
+{
+  "name": "NET_Test_2Nodes_1GPU",
+  "num_ranks": 2,
+  "num_nodes": 2,
+  "num_gpus": 1
+}
+```
+Command: `mpirun -np 2 --hostfile file --map-by ppr:1:node ...`
+
+### Setting Up Multi-Node Tests
+
+**Option 1: MPI Hostfile**
+```bash
+export RCCL_TEST_MPI_HOSTFILE=/path/to/hostfile
+python test_runner.py --config net_ib_test_config.json
+```
+
+**Option 2: Default Hostfile**
+Create `~/.mpi_hostfile` with node names (one per line):
+```
+node01 slots=8
+node02 slots=8
+```
+
+## Advanced Features
+
+### Build Configuration (New!)
+
+Customize the RCCL build process through the `build_configuration` section in your JSON config file.
+
+#### Basic Structure
+
+```json
+{
+  "build_configuration": {
+    "cmake_options": {
+      "CMAKE_BUILD_TYPE": "Debug",
+      "ENABLE_CODE_COVERAGE": "ON",
+      "ONLY_FUNCS": "SendRecv|AllReduce"
+    },
+    "env_variables": {
+      "HIPCC_COMPILE_FLAGS_APPEND": "-g -O1"
+    },
+    "parallel_jobs": 64,
+    "generator": "Unix Makefiles"
+  }
+}
+```
+
+#### Examples
+
+**Fast Development Build (No Coverage):**
+```json
+{
+  "build_configuration": {
+    "cmake_options": {
+      "ENABLE_CODE_COVERAGE": "OFF"
+    },
+    "parallel_jobs": 128
+  }
+}
+```
+
+**Release Build:**
+```json
+{
+  "build_configuration": {
+    "cmake_options": {
+      "CMAKE_BUILD_TYPE": "Release",
+      "TRACE": "OFF",
+      "COLLTRACE": "OFF"
+    }
+  }
+}
+```
+
+**Test Specific Functions Only:**
+```json
+{
+  "build_configuration": {
+    "cmake_options": {
+      "ONLY_FUNCS": "Broadcast|Reduce"
+    }
+  }
+}
+```
+
+**All Options:**
+- `cmake_options` - Any CMake option (user values override defaults)
+- `env_variables` - Build environment variables
+- `parallel_jobs` - Number of parallel build threads (default: 64)
+- `generator` - CMake generator: "Unix Makefiles", "Ninja", etc.
+
+See `BUILD_CONFIGURATION_GUIDE.md` for complete documentation.
+
+### Enhanced Environment Variable Expansion
+
+Environment variables in the `paths` section now support **nested expansion** in default values:
+
+```json
+{
+  "paths": {
+    "workdir": "${WORKDIR:-$HOME/code/rti/scripts/rccl}",
+    "rocm_path": "${ROCM_PATH:-/opt/rocm}",
+    "mpi_path": "${MPI_PATH:-$HOME/softwares/ompi}"
+  }
+}
+```
+
+**Key Feature:** If `WORKDIR` is not set, the default `$HOME/code/rti/scripts/rccl` will expand `$HOME` automatically!
+
+### Flexible Binary Paths
+
+Specify test binary locations in multiple ways for maximum flexibility:
+
+#### 1. Default (Relative to build_dir/test/)
+
+```json
+{
+  "binary": "all_reduce_perf"
+}
+```
+Result: `<workdir>/build_debug_cov_on_tests_on/test/all_reduce_perf`
+
+#### 2. Absolute Path
+
+```json
+{
+  "binary": "/opt/custom_rccl_build/test/all_reduce_perf"
+}
+```
+Result: Uses the absolute path directly
+
+#### 3. Environment Variable in Binary Name
+
+```json
+{
+  "binary": "${MY_RCCL_TESTS}/all_reduce_perf"
+}
+```
+Result: Expands `$MY_RCCL_TESTS` environment variable
+
+#### 4. Home Directory Expansion
+
+```json
+{
+  "binary": "~/my_builds/rccl/test/all_reduce_perf"
+}
+```
+Result: Expands `~` to home directory
+
+#### 5. Using test_binary_dir in Paths
+
+```json
+{
+  "paths": {
+    "test_binary_dir": "${RCCL_TEST_BIN_DIR}"
+  },
+  "test_configurations": {
+    "my_tests": {
+      "binary": "all_reduce_perf"
+    }
+  }
+}
+```
+Result: `${RCCL_TEST_BIN_DIR}/all_reduce_perf`
+
+#### 6. Using test_binary_dir in Test Config
+
+```json
+{
+  "test_configurations": {
+    "my_tests": {
+      "tests": [
+        {
+          "name": "CustomBinary",
+          "test_binary_dir": "/opt/rccl/tests",
+          "binary": "all_reduce_perf"
+        }
+      ]
+    }
+  }
+}
+```
+Result: `/opt/rccl/tests/all_reduce_perf`
+
+#### Resolution Priority Order
+
+1. **Absolute path in binary** - Highest priority
+2. **Environment variable expansion** (if results in absolute path)
+3. **test_binary_dir in test config** + binary
+4. **test_binary_dir in paths** + binary
+5. **Default:** `build_dir/test/` + binary - Lowest priority
+
+#### Use Cases
+
+- **CI/CD with pre-built binaries:** Use absolute paths or `RCCL_TEST_BIN_DIR`
+- **Multiple RCCL versions:** Different `test_binary_dir` per configuration
+- **Custom build locations:** Environment variables for flexibility
+- **Standard builds:** Use default (no configuration needed)
+
+#### Verbose Mode
+
+Use `--verbose` to see the resolved binary path:
+```bash
+python test_runner.py --config test.json --verbose
+```
+
+Output includes:
+```
+Binary:  all_reduce_perf
+Binary path: /home/user/code/rti/scripts/rccl/build_debug_cov_on_tests_on/test/all_reduce_perf
+```
+
+### Configuration Best Practices
+
+**Reduce Repetition:** Move common values to configuration level
+
+```json
+{
+  "test_configurations": {
+    "p2p_tests": {
+      "timeout": 120,
+      "env_variables": {
+        "NCCL_P2P_USE_CUDA_MEMCPY": "1",
+        "NCCL_LEGACY_CUDA_REGISTER": "1"
+      },
+      "tests": [
+        {
+          "name": "Test1"
+          // Inherits timeout and env vars from config level
+        },
+        {
+          "name": "Test2",
+          "timeout": 300
+          // Overrides timeout, inherits env vars
+        }
+      ]
+    }
+  }
+}
+```
+
+**Benefits:**
+- ✅ Single source of truth for common settings
+- ✅ Easier maintenance
+- ✅ Tests can still override when needed
+- ✅ Cleaner, more readable configurations
+
+## Development and Testing
+
+### Validate Configuration
+
+```bash
+# Test JSON syntax
+python3 -m json.tool test_config_sample.json
+
+# Test configuration loading
+python3 -c "from lib.test_config import TestConfigProcessor; \
+            p = TestConfigProcessor('test_config_sample.json'); \
+            print('Configuration valid!')"
+
+# Dry run (validate without executing)
+python test_runner.py --config test_config_sample.json --skip-tests --verbose
+```
+
+### Adding New Tests
+
+1. Add test definition to appropriate configuration in JSON file
+2. Specify `is_gtest`, `description`, and required fields
+3. Test with dry run first: `--skip-tests --verbose`
+4. Run actual test: `--test-name YourTest --verbose`
+
+### Test Type Handling
+
+The test runner uses a boolean `is_gtest` flag to distinguish between test types:
+
+- **`is_gtest: true`** (default): Uses GTest framework with `--gtest_filter=<filter>` syntax
+- **`is_gtest: false`**: Runs binary with plain arguments (for performance tests, custom scripts, etc.)
+
+This simplified approach eliminates the need for multiple test type conditionals while supporting all test categories (gtest, perf, custom).
+
+## Troubleshooting
+
+### "Configuration file not found"
+- Check the path to your JSON config file
+- Use absolute paths or ensure you're in the correct directory
+- Verify file permissions
+
+### "MPI path not found"
+- Update `paths.mpi_path` in your configuration
+- Ensure MPI is installed: `which mpirun`
+- Check MPI_PATH environment variable
+
+### "Test binary not found"
+- Build first: remove `--no-build` flag
+- Check binary name in `build/test/` directory
+- Verify CMAKE built successfully
+
+### Multi-node tests hang
+- Ensure SLURM allocation or hostfile is configured
+- Check network connectivity: `ping other_node`
+- Verify MPI can reach nodes: `mpirun -np 2 hostname`
+- Check firewall settings
+
+### CMake configuration fails
+- Check ROCm path: `ls $ROCM_PATH`
+- Verify compiler: `$ROCM_PATH/bin/amdclang++ --version`
+- Check MPI path: `ls $MPI_PATH/bin/mpirun`
+
+### Coverage report fails
+- Ensure LLVM tools are available: `which llvm-profdata llvm-cov`
+- Check for `.profraw` files in build directory
+- Verify coverage build flags were set correctly
+- Run with `--verbose` to see detailed error messages
+
+### "LLVM_PROFILE_FILE not being used"
+- Ensure `--coverage-report` flag is specified
+- Check that tests are actually executing (not skipped)
+- Verify environment variables with `--verbose`
+
+---
+
+## Appendix: Environment Variables Reference
+
+This section provides a quick reference for all environment variables supported by the test runner.
+
+### Library and Build Location
+
+| Variable | Description | Example |
+|----------|-------------|---------|
+| `RCCL_LIB_PATH` | Path to pre-built RCCL library directory. Automatically skips build. | `export RCCL_LIB_PATH=/path/to/rccl/build` |
+| `RCCL_BUILD_DIR` | Alternative name for `RCCL_LIB_PATH`. | `export RCCL_BUILD_DIR=/home/user/rccl_builds/debug` |
+
+**Requirements**: Directory must contain `librccl.so` and `test/` subdirectory.
+
+### Configuration Paths
+
+These override the paths specified in the JSON configuration file:
+
+| Variable | Description | Example |
+|----------|-------------|---------|
+| `WORKDIR` | RCCL source and build directory | `export WORKDIR=/home/user/code/rccl` |
+| `ROCM_PATH` | ROCm installation path | `export ROCM_PATH=/opt/rocm-6.0` |
+| `MPI_PATH` | MPI installation path | `export MPI_PATH=/usr/local/openmpi` |
+
+### Test Execution
+
+| Variable | Description | Example |
+|----------|-------------|---------|
+| `RCCL_TEST_MPI_HOSTFILE` | Path to MPI hostfile for multi-node tests | `export RCCL_TEST_MPI_HOSTFILE=~/.mpi_hostfile` |
+
+**Note**: Falls back to `~/.mpi_hostfile` if not set. For SLURM environments, hostfile is auto-generated from `SLURM_NODELIST`.
+
+### Test-Specific Variables
+
+These can be set globally or specified in the JSON configuration per test:
+
+| Variable | Description | Example |
+|----------|-------------|---------|
+| `NCCL_DEBUG` | NCCL debug level (VERSION, WARN, INFO, TRACE) | `export NCCL_DEBUG=INFO` |
+| `NCCL_DEBUG_SUBSYS` | NCCL debug subsystems to enable | `export NCCL_DEBUG_SUBSYS=INIT,COLL,NET` |
+| `HSA_NO_SCRATCH_RECLAIM` | Disable HIP scratch memory reclaim | `export HSA_NO_SCRATCH_RECLAIM=1` |
+| `NCCL_LAUNCH_MODE` | NCCL launch mode (GROUP, PARALLEL) | `export NCCL_LAUNCH_MODE=GROUP` |
+
+### Coverage and Profiling
+
+| Variable | Description | Example |
+|----------|-------------|---------|
+| `LLVM_PROFILE_FILE` | LLVM coverage profile output pattern | `export LLVM_PROFILE_FILE=rccl_%p_%m.profraw` |
+
+**Note**: Automatically set by test runner to prevent collisions. Manual override not recommended.
+
+### Complete Example
+
+```bash
+#!/bin/bash
+# Configure paths
+export WORKDIR=/home/user/code/rccl
+export ROCM_PATH=/opt/rocm-6.0
+export MPI_PATH=/usr/local/openmpi
+
+# Use pre-built library
+export RCCL_LIB_PATH=/home/user/rccl_builds/instrumented
+
+# Configure MPI
+export RCCL_TEST_MPI_HOSTFILE=~/.mpi_hostfile
+
+# Enable debug output
+export NCCL_DEBUG=INFO
+export NCCL_DEBUG_SUBSYS=INIT,COLL,NET
+
+# Run tests
+python test_runner.py --config my_tests.json --verbose
+```
+
+### Variable Priority
+
+When the same configuration can be specified in multiple places, the priority is:
+
+1. **Environment variables** (highest priority)
+2. **Test-specific configuration** (in JSON)
+3. **Test suite configuration** (in JSON)
+4. **Test configuration defaults** (in JSON)
+5. **Built-in defaults** (lowest priority)
+
+**Example**: If `ROCM_PATH` is set as an environment variable, it overrides the `rocm_path` value in the JSON configuration file.
+
diff --git a/projects/rccl/tools/scripts/test_runner/configs/mi300x_mellanox_ib.json b/projects/rccl/tools/scripts/test_runner/configs/mi300x_mellanox_ib.json
new file mode 100644
index 0000000000..33041689e8
--- /dev/null
+++ b/projects/rccl/tools/scripts/test_runner/configs/mi300x_mellanox_ib.json
@@ -0,0 +1,506 @@
+{
+  "system_configurations": {
+    "name": "RCCL-Tests-MI300X-Mellanox-IB",
+    "description": "Comprehensive RCCL Test Configuration - All Tests"
+  },
+  "paths": {
+    "workdir": "${WORKDIR:-$PWD}",
+    "rocm_path": "${ROCM_PATH:-/opt/rocm}",
+    "mpi_path": "${MPI_PATH:-/opt/ompi}"
+  },
+  "env_variables": {
+    "HSA_NO_SCRATCH_RECLAIM": "1",
+    "NCCL_SOCKET_IFNAME": "eth0,eth1",
+    "NCCL_DEBUG": "INFO"
+  },
+  "build_configuration": {
+    "cmake_options": {
+      "CMAKE_BUILD_TYPE": "Debug",
+      "ENABLE_CODE_COVERAGE": "ON",
+      "BUILD_TESTS": "ON",
+      "BUILD_LOCAL_GPU_TARGET_ONLY": "ON",
+      "TRACE": "ON",
+      "COLLTRACE": "ON"
+    },
+    "env_variables": {
+      "HIPCC_COMPILE_FLAGS_APPEND": "-g -Wno-format-nonliteral -Xarch_host -fprofile-instr-generate -Xarch_host -fcoverage-mapping -parallel-jobs=16"
+    },
+    "parallel_jobs": 64,
+    "generator": "Unix Makefiles"
+  },
+  "test_configurations": {
+    "default": {
+      "env_variables": {
+        "NCCL_LAUNCH_MODE": "GROUP"
+      }
+    },
+    "shm_comprehensive": {
+      "extends": "default",
+      "is_gtest": true,
+      "binary": "rccl-UnitTestsMPI",
+      "num_ranks": 2,
+      "num_nodes": 1,
+      "num_gpus": 2,
+      "timeout": 120,
+      "env_variables": {
+        "NCCL_SHM_DISABLE": "0",
+        "NCCL_SHM_USE_CUDA_MEMCPY": "1"
+      },
+      "tests": [
+        {
+          "name": "SHM_ComprehensiveWorkflow",
+          "description": "Comprehensive workflow test for shared memory transport",
+          "test_filter": "ShmMPITest.ShmWorkflow"
+        },
+        {
+          "name": "SHM_CEMemcpy_SendSide",
+          "description": "Shared memory test with compute engine memcpy on send side",
+          "test_filter": "ShmMPITest.ShmWithMemcpyTest",
+          "timeout": 180,
+          "env_variables": {
+            "NCCL_SHM_MEMCPY_MODE": "1",
+            "NCCL_SHM_LOCALITY": "1"
+          }
+        },
+        {
+          "name": "SHM_CEMemcpy_RecvSide",
+          "description": "Shared memory test with compute engine memcpy on receive side",
+          "test_filter": "ShmMPITest.ShmWithMemcpyTest",
+          "env_variables": {
+            "NCCL_SHM_MEMCPY_MODE": "2",
+            "NCCL_SHM_LOCALITY": "2"
+          }
+        },
+        {
+          "name": "SHM_CEMemcpy_BothSides",
+          "description": "Shared memory test with compute engine memcpy on both send and receive sides using simple protocol",
+          "test_filter": "ShmMPITest.ShmWithMemcpyTest",
+          "env_variables": {
+            "NCCL_PROTO": "SIMPLE",
+            "NCCL_SHM_MEMCPY_MODE": "3",
+            "NCCL_SHM_LOCALITY": "1"
+          }
+        },
+        {
+          "name": "SHM_AllTests",
+          "description": "All shared memory transport tests",
+          "test_filter": "ShmMPITest.*"
+        }
+      ]
+    },
+    "p2p_comprehensive": {
+      "extends": "default",
+      "is_gtest": true,
+      "binary": "rccl-UnitTestsMPI",
+      "num_ranks": 2,
+      "num_nodes": 1,
+      "num_gpus": 2,
+      "timeout": 120,
+      "env_variables": {
+        "NCCL_P2P_USE_CUDA_MEMCPY": "1",
+        "NCCL_LEGACY_CUDA_REGISTER": "1"
+      },
+      "tests": [
+        {
+          "name": "P2P_Workflow",
+          "description": "Peer-to-peer transport workflow test between two GPUs",
+          "test_filter": "P2pMPITest.P2pWorkflow",
+          "env_variables": {
+            "NCCL_P2P_DISABLE": "0"
+          }
+        },
+        {
+          "name": "P2P_WithMemcpy",
+          "description": "Peer-to-peer test with CUDA memcpy and legacy buffer registration",
+          "test_filter": "P2pMPITest.P2pWithMemcpyTest"
+        },
+        {
+          "name": "P2P_SendRecvRegistration",
+          "description": "Test peer-to-peer send/receive buffer registration mechanisms",
+          "test_filter": "P2pMPITest.P2pSendRecvRegistrationTest"
+        },
+        {
+          "name": "P2P_IpcReg_VerySmallBuffer",
+          "description": "Test P2P IPC buffer registration with very small buffer sizes and SHM disabled",
+          "test_filter": "P2pMPITest.P2pIpcBufferRegistration_VerySmallBuffer",
+          "env_variables": {
+            "NCCL_SHM_DISABLE": "1",
+            "NCCL_LOCAL_REGISTER": "1"
+          }
+        },
+        {
+          "name": "P2P_AllTests",
+          "description": "All peer-to-peer transport tests",
+          "test_filter": "P2pMPITest.*"
+        }
+      ]
+    },
+    "net_transport_eth_multinode": {
+      "extends": "default",
+      "is_gtest": true,
+      "binary": "rccl-UnitTestsMPI",
+      "num_ranks": 2,
+      "num_nodes": 2,
+      "num_gpus": 1,
+      "env_variables": {
+        "NCCL_NET_SHARED_COMMS": "1",
+        "NCCL_NET_SHARED_BUFFERS": "1",
+        "NCCL_IB_DISABLE": "0"
+      },
+      "tests": [
+        {
+          "name": "NET_AllTests_2Nodes_ETH",
+          "description": "All network transport tests over Ethernet across two nodes",
+          "test_filter": "NetTransportMPITest.*",
+          "timeout": 600
+        },
+        {
+          "name": "NET_MultipleBufferSizes_2Nodes",
+          "description": "Network transport test with multiple buffer sizes across two nodes",
+          "test_filter": "NetTransportMPITest.MultipleBufferSizesTest",
+          "timeout": 180
+        },
+        {
+          "name": "NET_NetGraphRegister_2Nodes",
+          "description": "Network transport test with graph buffer registration across two nodes",
+          "test_filter": "NetTransportMPITest.NetGraphRegisterBufferTest",
+          "timeout": 120,
+          "env_variables": {
+            "NCCL_GRAPH_REGISTER": "1"
+          }
+        }
+      ]
+    },
+    "net_ib_base": {
+      "extends": "default",
+      "is_gtest": true,
+      "binary": "rccl-UnitTestsMPI",
+      "num_ranks": 2,
+      "num_nodes": 1,
+      "num_gpus": 2,
+      "env_variables": {
+        "NCCL_DMABUF_ENABLE": "1"
+      }
+    },
+    "net_ib_initialization": {
+      "extends": "net_ib_base",
+      "timeout": 60,
+      "tests": [
+        {
+          "name": "NetIB_Init_Plugin",
+          "description": "Initialize InfiniBand network plugin and verify basic setup",
+          "test_filter": "NetIbMPITest.InitializePlugin"
+        },
+        {
+          "name": "NetIB_Init_GetDeviceCount",
+          "description": "Query and validate InfiniBand device count",
+          "test_filter": "NetIbMPITest.GetDeviceCount"
+        }
+      ]
+    },
+    "net_ib_properties": {
+      "extends": "net_ib_base",
+      "timeout": 60,
+      "tests": [
+        {
+          "name": "NetIB_Props_GetProperties",
+          "description": "Query InfiniBand device properties and capabilities",
+          "test_filter": "NetIbMPITest.GetDeviceProperties"
+        },
+        {
+          "name": "NetIB_Props_InvalidDevice",
+          "description": "Test error handling when querying properties of invalid InfiniBand device",
+          "test_filter": "NetIbMPITest.GetDevicePropertiesInvalidDevice"
+        }
+      ]
+    },
+    "net_ib_memory": {
+      "extends": "net_ib_base",
+      "timeout": 120,
+      "tests": [
+        {
+          "name": "NetIB_Mem_RegisterHost",
+          "description": "Test InfiniBand registration of host memory buffers",
+          "test_filter": "NetIbMPITest.RegisterHostMemory"
+        },
+        {
+          "name": "NetIB_Mem_RegisterGpu",
+          "description": "Test InfiniBand registration of GPU device memory buffers",
+          "test_filter": "NetIbMPITest.RegisterGpuMemory"
+        }
+      ]
+    },
+    "net_ib_transfer": {
+      "extends": "net_ib_base",
+      "env_variables": {
+        "NCCL_DEBUG": "TRACE",
+        "NCCL_DEBUG_SUBSYS": "NET,INIT"
+      },
+      "tests": [
+        {
+          "name": "NetIB_Xfer_SimpleSendRecv",
+          "description": "Basic InfiniBand send/receive data transfer test",
+          "test_filter": "NetIbMPITest.SimpleSendRecv",
+          "timeout": 180,
+          "env_variables": {
+            "RCCL_MPI_LOG_ALL_RANKS": "1"
+          }
+        },
+        {
+          "name": "NetIB_Xfer_MultipleSizes",
+          "description": "InfiniBand data transfer with multiple buffer sizes",
+          "test_filter": "NetIbMPITest.SendRecvMultipleSizes",
+          "timeout": 300
+        },
+        {
+          "name": "NetIB_Stress_LargeTransfer",
+          "description": "Stress test for large data transfers over InfiniBand",
+          "test_filter": "NetIbMPITest.LargeTransfer",
+          "timeout": 300
+        }
+      ]
+    },
+    "unit_tests_fixtures": {
+      "is_gtest": true,
+      "binary": "rccl-UnitTestsFixtures",
+      "num_ranks": 1,
+      "num_nodes": 1,
+      "num_gpus": 8,
+      "timeout": 0,
+      "tests": [
+        {
+          "name": "NetIb_Debug",
+          "description": "InfiniBand unit tests with debug output using test fixtures",
+          "test_filter": "NetIbTests.*",
+          "env_variables": {
+            "NCCL_SOCKET_IFNAME": "eth1"
+          }
+        },
+        {
+          "name": "Rcclwrap_All",
+          "description": "RCCL wrapper API unit tests with trace-level debugging",
+          "test_filter": "Rcclwrap.*",
+          "env_variables": {
+            "NCCL_DEBUG": "TRACE"
+	  }
+        },
+        {
+          "name": "Fixtures_All",
+          "description": "All Fixtures tests",
+          "env_variables": {
+            "NCCL_DEBUG": "TRACE"
+          }
+        }
+      ]
+    },
+    "unit_tests_standard": {
+      "is_gtest": true,
+      "binary": "rccl-UnitTests",
+      "num_ranks": 1,
+      "num_nodes": 1,
+      "num_gpus": 8,
+      "timeout": 0,
+      "env_variables": {
+        "NCCL_DEBUG": ""
+      },
+      "tests": [
+        {"name": "AllReduce.OutOfPlace", "description": "AllReduce out-of-place", "test_filter": "AllReduce.OutOfPlace"},
+        {"name": "AllReduce.OutOfPlaceGraph", "description": "AllReduce out-of-place graph", "test_filter": "AllReduce.OutOfPlaceGraph"},
+        {"name": "AllReduce.InPlace", "description": "AllReduce in-place", "test_filter": "AllReduce.InPlace"},
+        {"name": "AllReduce.InPlaceGraph", "description": "AllReduce in-place graph", "test_filter": "AllReduce.InPlaceGraph"},
+        {"name": "AllReduce.ManagedMem", "description": "AllReduce managed memory", "test_filter": "AllReduce.ManagedMem"},
+        {"name": "AllReduce.Channels", "description": "AllReduce channels", "test_filter": "AllReduce.Channels"},
+        {"name": "AllReduce.ManagedMemGraph", "description": "AllReduce managed memory graph", "test_filter": "AllReduce.ManagedMemGraph"},
+        {"name": "AllReduce.PreMultScalar", "description": "AllReduce pre-mult scalar", "test_filter": "AllReduce.PreMultScalar"},
+        {"name": "AllReduce.UserBufferRegistration", "description": "AllReduce user buffer registration", "test_filter": "AllReduce.UserBufferRegistration"},
+        {"name": "AllReduce.ManagedMemUserBufferRegistration", "description": "AllReduce managed mem user buffer", "test_filter": "AllReduce.ManagedMemUserBufferRegistration"},
+        {"name": "AllGather.OutOfPlace", "description": "AllGather out-of-place", "test_filter": "AllGather.OutOfPlace"},
+        {"name": "AllGather.OutOfPlaceGraph", "description": "AllGather out-of-place graph", "test_filter": "AllGather.OutOfPlaceGraph"},
+        {"name": "AllGather.InPlace", "description": "AllGather in-place", "test_filter": "AllGather.InPlace"},
+        {"name": "AllGather.InPlaceGraph", "description": "AllGather in-place graph", "test_filter": "AllGather.InPlaceGraph"},
+        {"name": "AllGather.ManagedMem", "description": "AllGather managed memory", "test_filter": "AllGather.ManagedMem"},
+        {"name": "AllGather.ManagedMemGraph", "description": "AllGather managed memory graph", "test_filter": "AllGather.ManagedMemGraph"},
+        {"name": "AllGather.UserBufferRegistration", "description": "AllGather user buffer registration", "test_filter": "AllGather.UserBufferRegistration"},
+        {"name": "AllGather.ManagedMemUserBufferRegistration", "description": "AllGather managed mem user buffer", "test_filter": "AllGather.ManagedMemUserBufferRegistration"},
+        {"name": "AllToAll.OutOfPlace", "description": "AllToAll out-of-place", "test_filter": "AllToAll.OutOfPlace"},
+        {"name": "AllToAll.OutOfPlaceGraph", "description": "AllToAll out-of-place graph", "test_filter": "AllToAll.OutOfPlaceGraph"},
+        {"name": "AllToAll.ManagedMem", "description": "AllToAll managed memory", "test_filter": "AllToAll.ManagedMem"},
+        {"name": "AllToAll.ManagedMemGraph", "description": "AllToAll managed memory graph", "test_filter": "AllToAll.ManagedMemGraph"},
+        {"name": "AllToAllv.OutOfPlace", "description": "AllToAllv out-of-place", "test_filter": "AllToAllv.OutOfPlace"},
+        {"name": "AllToAllv.OutOfPlaceGraph", "description": "AllToAllv out-of-place graph", "test_filter": "AllToAllv.OutOfPlaceGraph"},
+        {"name": "Broadcast.OutOfPlace", "description": "Broadcast out-of-place", "test_filter": "Broadcast.OutOfPlace"},
+        {"name": "Broadcast.OutOfPlaceGraph", "description": "Broadcast out-of-place graph", "test_filter": "Broadcast.OutOfPlaceGraph"},
+        {"name": "Broadcast.InPlace", "description": "Broadcast in-place", "test_filter": "Broadcast.InPlace"},
+        {"name": "Broadcast.InPlaceGraph", "description": "Broadcast in-place graph", "test_filter": "Broadcast.InPlaceGraph"},
+        {"name": "Broadcast.ManagedMem", "description": "Broadcast managed memory", "test_filter": "Broadcast.ManagedMem"},
+        {"name": "Broadcast.ManagedMemGraph", "description": "Broadcast managed memory graph", "test_filter": "Broadcast.ManagedMemGraph"},
+        {"name": "Gather.OutOfPlace", "description": "Gather out-of-place", "test_filter": "Gather.OutOfPlace"},
+        {"name": "Gather.OutOfPlaceGraph", "description": "Gather out-of-place graph", "test_filter": "Gather.OutOfPlaceGraph"},
+        {"name": "Gather.InPlace", "description": "Gather in-place", "test_filter": "Gather.InPlace"},
+        {"name": "Gather.InPlaceGraph", "description": "Gather in-place graph", "test_filter": "Gather.InPlaceGraph"},
+        {"name": "Gather.ManagedMem", "description": "Gather managed memory", "test_filter": "Gather.ManagedMem"},
+        {"name": "Gather.ManagedMemGraph", "description": "Gather managed memory graph", "test_filter": "Gather.ManagedMemGraph"},
+        {"name": "Scatter.OutOfPlace", "description": "Scatter out-of-place", "test_filter": "Scatter.OutOfPlace"},
+        {"name": "Scatter.OutOfPlaceGraph", "description": "Scatter out-of-place graph", "test_filter": "Scatter.OutOfPlaceGraph"},
+        {"name": "Scatter.InPlace", "description": "Scatter in-place", "test_filter": "Scatter.InPlace"},
+        {"name": "Scatter.InPlaceGraph", "description": "Scatter in-place graph", "test_filter": "Scatter.InPlaceGraph"},
+        {"name": "Scatter.ManagedMem", "description": "Scatter managed memory", "test_filter": "Scatter.ManagedMem"},
+        {"name": "Scatter.ManagedMemGraph", "description": "Scatter managed memory graph", "test_filter": "Scatter.ManagedMemGraph"},
+        {"name": "Reduce.OutOfPlace", "description": "Reduce out-of-place", "test_filter": "Reduce.OutOfPlace"},
+        {"name": "Reduce.OutOfPlaceGraph", "description": "Reduce out-of-place graph", "test_filter": "Reduce.OutOfPlaceGraph"},
+        {"name": "Reduce.InPlace", "description": "Reduce in-place", "test_filter": "Reduce.InPlace"},
+        {"name": "Reduce.InPlaceGraph", "description": "Reduce in-place graph", "test_filter": "Reduce.InPlaceGraph"},
+        {"name": "Reduce.ManagedMem", "description": "Reduce managed memory", "test_filter": "Reduce.ManagedMem"},
+        {"name": "Reduce.ManagedMemGraph", "description": "Reduce managed memory graph", "test_filter": "Reduce.ManagedMemGraph"},
+        {"name": "ReduceScatter.OutOfPlace", "description": "ReduceScatter out-of-place", "test_filter": "ReduceScatter.OutOfPlace"},
+        {"name": "ReduceScatter.OutOfPlaceGraph", "description": "ReduceScatter out-of-place graph", "test_filter": "ReduceScatter.OutOfPlaceGraph"},
+        {"name": "ReduceScatter.InPlace", "description": "ReduceScatter in-place", "test_filter": "ReduceScatter.InPlace"},
+        {"name": "ReduceScatter.InPlaceGraph", "description": "ReduceScatter in-place graph", "test_filter": "ReduceScatter.InPlaceGraph"},
+        {"name": "ReduceScatter.ManagedMem", "description": "ReduceScatter managed memory", "test_filter": "ReduceScatter.ManagedMem"},
+        {"name": "ReduceScatter.ManagedMemGraph", "description": "ReduceScatter managed memory graph", "test_filter": "ReduceScatter.ManagedMemGraph"},
+        {"name": "SendRecv.SinglePairs", "description": "SendRecv single pairs", "test_filter": "SendRecv.SinglePairs"},
+        {"name": "SendRecv.UserBufferRegister", "description": "SendRecv user buffer register", "test_filter": "SendRecv.UserBufferRegister"},
+        {"name": "GroupCall.Identical", "description": "GroupCall identical", "test_filter": "GroupCall.Identical"},
+        {"name": "GroupCall.Different", "description": "GroupCall different", "test_filter": "GroupCall.Different"},
+        {"name": "GroupCall.Multistream", "description": "GroupCall multistream", "test_filter": "GroupCall.Multistream"},
+        {"name": "GroupCall.MixedDataType", "description": "GroupCall mixed data type", "test_filter": "GroupCall.MixedDataType"},
+        {"name": "GroupCall.MultiGroupCall", "description": "GroupCall multi group call", "test_filter": "GroupCall.MultiGroupCall"},
+        {"name": "NonBlocking.SingleCalls", "description": "NonBlocking single calls", "test_filter": "NonBlocking.SingleCalls"},
+        {"name": "CommTests.Sorter", "description": "CommTests sorter", "test_filter": "CommTests.Sorter"},
+        {"name": "Enqueue", "description": "Enqueue operation tests", "test_filter": "Enqueue.*"},
+        {"name": "Alloc", "description": "Memory allocation tests", "test_filter": "Alloc.*"},
+        {"name": "ParamTests", "description": "Parameter handling tests", "test_filter": "ParamTests.*"},
+        {"name": "ProxyTests", "description": "Proxy service tests", "test_filter": "ProxyTests.*"},
+        {"name": "Rcclwrap", "description": "RCCL wrapper tests", "test_filter": "Rcclwrap.*"},
+        {"name": "TransportTest", "description": "Transport layer tests", "test_filter": "TransportTest.*"},
+        {"name": "ArgCheck", "description": "Argument validation tests", "test_filter": "ArgCheck.*"},
+        {"name": "BitOps", "description": "Bit operation utility tests", "test_filter": "ALIGN_*:DIVUP:ROUNDUP:u32fp*:*Hash"},
+        {"name": "AltRsmi", "description": "Alternative RSMI tests", "test_filter": "AltRsmi.*"},
+        {"name": "NetSocket", "description": "Network socket tests", "test_filter": "NetSocket.*"},
+        {"name": "Ipcsocket", "description": "IPC socket tests", "test_filter": "Ipcsocket.*"},
+        {"name": "Standalone.SplitComms_RankCheck", "description": "Verify device assignment for each rank using ncclCommSplit API", "test_filter": "Standalone.SplitComms_RankCheck"},
+        {"name": "Standalone.SplitComms_OneColor", "description": "Creates communicator for each device with same color", "test_filter": "Standalone.SplitComms_OneColor"},
+        {"name": "Standalone.SplitComms_Reduce", "description": "Reduces communicators into fewer ranks", "test_filter": "Standalone.SplitComms_Reduce"},
+        {"name": "Standalone.RegressionTiming", "description": "Verify no timing regression for protocols (LL, LL128, Simple)", "test_filter": "Standalone.RegressionTiming"},
+        {"name": "Standalone.StackSize", "description": "Verify RCCL kernel stack size for each gfx architecture", "test_filter": "Standalone.StackSize"},
+        {"name": "Standalone.CommCuDevice_Check", "description": "Verify device associated with communicator in single/multi-device scenarios", "test_filter": "Standalone.CommCuDevice_Check"},
+        {"name": "Standalone.SplitComms_RankCheck_Basic_Failure", "description": "Verify ncclCommUserRank fails with invalid communicator handle", "test_filter": "Standalone.SplitComms_RankCheck_Basic_Failure"}
+      ]
+    },
+    "debug_tests": {
+      "is_gtest": true,
+      "binary": "rccl-UnitTests",
+      "num_ranks": 1,
+      "num_nodes": 1,
+      "num_gpus": 8,
+      "timeout": 0,
+      "env_variables": {
+        "NCCL_DEBUG": "VERSION",
+        "NCCL_DEBUG_SUBSYS": "ALL"
+      },
+      "tests": [
+        {
+          "name": "Debug_ThreadName",
+          "description": "Test thread naming functionality with AllToAll operation",
+          "test_filter": "AllToAll.OutOfPlaceGraph*",
+          "env_variables": {
+            "NCCL_SET_THREAD_NAME": "1"
+          }
+        },
+        {
+          "name": "Debug_AllSubsystems",
+          "description": "Test debug logging for all RCCL subsystems with trace-level output",
+          "test_filter": "AllToAll.OutOfPlaceGraph*",
+          "env_variables": {
+            "NCCL_DEBUG": "TRACE",
+            "NCCL_DEBUG_FILE": "cdcvg.dbg",
+            "NCCL_DEBUG_SUBSYS": "INIT,COLL,P2P,SHM,NET,GRAPH,TUNING,ENV,ALLOC,CALL,PROXY,NVLS,BOOTSTRAP,REG,PROFILE,RAS,VERBS"
+          }
+        }
+      ]
+    },
+    "alt_rsmi_tests": {
+      "is_gtest": true,
+      "binary": "rccl-UnitTestsFixtures",
+      "num_ranks": 1,
+      "num_nodes": 1,
+      "num_gpus": 1,
+      "timeout": 120,
+      "tests": [
+        {
+          "name": "AltRsmi_AllTests",
+          "description": "All Alternative RSMI implementation tests using public API only",
+          "test_filter": "AltRsmiTest.*"
+        }
+      ]
+    }
+  },
+  "test_suites": [
+    {
+      "name": "SHM Tests - Complete Suite",
+      "description": "All shared memory transport tests (single node)",
+      "config": "shm_comprehensive",
+      "enabled": true
+    },
+    {
+      "name": "P2P Tests - Complete Suite",
+      "description": "All peer-to-peer transport tests (single node)",
+      "config": "p2p_comprehensive",
+      "enabled": true
+    },
+    {
+      "name": "NET Transport - Ethernet (Multi-Node)",
+      "description": "Network transport tests over Ethernet",
+      "config": "net_transport_eth_multinode",
+      "enabled": true
+    },
+    {
+      "name": "NET IB - Initialization Tests",
+      "description": "InfiniBand plugin initialization and device enumeration",
+      "config": "net_ib_initialization",
+      "enabled": true
+    },
+    {
+      "name": "NET IB - Device Properties",
+      "description": "InfiniBand device property queries",
+      "config": "net_ib_properties",
+      "enabled": true
+    },
+    {
+      "name": "NET IB - Memory Registration",
+      "description": "InfiniBand memory registration tests",
+      "config": "net_ib_memory",
+      "enabled": true
+    },
+    {
+      "name": "NET IB - Data Transfer",
+      "description": "InfiniBand data transfer and stress tests",
+      "config": "net_ib_transfer",
+      "enabled": true
+    },
+    {
+      "name": "Unit Tests - Fixtures",
+      "description": "Non-MPI unit tests using fixtures",
+      "config": "unit_tests_fixtures",
+      "enabled": true
+    },
+    {
+      "name": "Unit Tests - Standard Collectives",
+      "description": "Basic collective operation tests",
+      "config": "unit_tests_standard",
+      "enabled": true
+    },
+    {
+      "name": "Debug and Logging Tests",
+      "description": "Tests for debug output and logging functionality",
+      "config": "debug_tests",
+      "enabled": true
+    },
+    {
+      "name": "AltRsmi Tests - Complete Suite",
+      "description": "All Alternative RSMI tests using public API only",
+      "config": "alt_rsmi_tests",
+      "enabled": true
+    }
+  ]
+}
diff --git a/projects/rccl/tools/scripts/test_runner/configs/rccl_perf_tests.json b/projects/rccl/tools/scripts/test_runner/configs/rccl_perf_tests.json
new file mode 100644
index 0000000000..bdf1810a0c
--- /dev/null
+++ b/projects/rccl/tools/scripts/test_runner/configs/rccl_perf_tests.json
@@ -0,0 +1,458 @@
+{
+  "system_configurations": {
+    "name": "RCCL-Performance-Benchmarks",
+    "description": "RCCL Performance Test Suite - All Collective Operations"
+  },
+  "paths": {
+    "workdir": "${WORKDIR:-/path/to/rccl}",
+    "rocm_path": "${ROCM_PATH:-/opt/rocm}",
+    "mpi_path": "${MPI_PATH:-/opt/ompi}",
+    "test_binary_dir": "${RCCL_TEST_BIN_DIR}"
+  },
+  "env_variables": {
+    "HSA_NO_SCRATCH_RECLAIM": "1",
+    "NCCL_DEBUG": "WARN"
+  },
+  "build_configuration": {
+    "cmake_options": {
+      "CMAKE_BUILD_TYPE": "Release",
+      "ENABLE_CODE_COVERAGE": "OFF",
+      "BUILD_TESTS": "ON",
+      "BUILD_LOCAL_GPU_TARGET_ONLY": "ON",
+      "TRACE": "OFF",
+      "COLLTRACE": "OFF"
+    },
+    "env_variables": {
+      "HIPCC_COMPILE_FLAGS_APPEND": "-O3"
+    },
+    "parallel_jobs": 64,
+    "generator": "Unix Makefiles"
+  },
+  "test_configurations": {
+    "perf_base": {
+      "is_gtest": false,
+      "num_ranks": 8,
+      "num_nodes": 1,
+      "timeout": 300,
+      "env_variables": {
+        "NCCL_LAUNCH_MODE": "GROUP"
+      }
+    },
+    "allreduce_perf": {
+      "extends": "perf_base",
+      "binary": "all_reduce_perf",
+      "tests": [
+        {
+          "name": "AllReduce_Perf_SmallMessages",
+          "description": "AllReduce bandwidth test for small messages (8B - 8KB)",
+          "command_args": "-b 8 -e 8K -f 2 -g 1"
+        },
+        {
+          "name": "AllReduce_Perf_MediumMessages",
+          "description": "AllReduce bandwidth test for medium messages (16KB - 1MB)",
+          "command_args": "-b 16K -e 1M -f 2 -g 1"
+        },
+        {
+          "name": "AllReduce_Perf_LargeMessages",
+          "description": "AllReduce bandwidth test for large messages (2MB - 128MB)",
+          "command_args": "-b 2M -e 128M -f 2 -g 1",
+          "timeout": 600
+        },
+        {
+          "name": "AllReduce_Perf_InPlace",
+          "description": "AllReduce in-place bandwidth test",
+          "command_args": "-b 8 -e 128M -f 2 -g 1 -c 1"
+        },
+        {
+          "name": "AllReduce_Perf_MultiGPU",
+          "description": "AllReduce test with all 8 GPUs",
+          "command_args": "-b 1M -e 128M -f 2 -g 8"
+        }
+      ]
+    },
+    "allgather_perf": {
+      "extends": "perf_base",
+      "binary": "all_gather_perf",
+      "tests": [
+        {
+          "name": "AllGather_Perf_SmallMessages",
+          "description": "AllGather bandwidth test for small messages",
+          "command_args": "-b 8 -e 8K -f 2 -g 1"
+        },
+        {
+          "name": "AllGather_Perf_MediumMessages",
+          "description": "AllGather bandwidth test for medium messages",
+          "command_args": "-b 16K -e 1M -f 2 -g 1"
+        },
+        {
+          "name": "AllGather_Perf_LargeMessages",
+          "description": "AllGather bandwidth test for large messages",
+          "command_args": "-b 2M -e 128M -f 2 -g 1",
+          "timeout": 600
+        }
+      ]
+    },
+    "broadcast_perf": {
+      "extends": "perf_base",
+      "binary": "broadcast_perf",
+      "tests": [
+        {
+          "name": "Broadcast_Perf_SmallMessages",
+          "description": "Broadcast bandwidth test for small messages",
+          "command_args": "-b 8 -e 8K -f 2 -g 1"
+        },
+        {
+          "name": "Broadcast_Perf_MediumMessages",
+          "description": "Broadcast bandwidth test for medium messages",
+          "command_args": "-b 16K -e 1M -f 2 -g 1"
+        },
+        {
+          "name": "Broadcast_Perf_LargeMessages",
+          "description": "Broadcast bandwidth test for large messages",
+          "command_args": "-b 2M -e 128M -f 2 -g 1",
+          "timeout": 600
+        }
+      ]
+    },
+    "reduce_perf": {
+      "extends": "perf_base",
+      "binary": "reduce_perf",
+      "tests": [
+        {
+          "name": "Reduce_Perf_SmallMessages",
+          "description": "Reduce bandwidth test for small messages",
+          "command_args": "-b 8 -e 8K -f 2 -g 1"
+        },
+        {
+          "name": "Reduce_Perf_MediumMessages",
+          "description": "Reduce bandwidth test for medium messages",
+          "command_args": "-b 16K -e 1M -f 2 -g 1"
+        },
+        {
+          "name": "Reduce_Perf_LargeMessages",
+          "description": "Reduce bandwidth test for large messages",
+          "command_args": "-b 2M -e 128M -f 2 -g 1",
+          "timeout": 600
+        }
+      ]
+    },
+    "reducescatter_perf": {
+      "extends": "perf_base",
+      "binary": "reduce_scatter_perf",
+      "tests": [
+        {
+          "name": "ReduceScatter_Perf_SmallMessages",
+          "description": "ReduceScatter bandwidth test for small messages",
+          "command_args": "-b 8 -e 8K -f 2 -g 1"
+        },
+        {
+          "name": "ReduceScatter_Perf_MediumMessages",
+          "description": "ReduceScatter bandwidth test for medium messages",
+          "command_args": "-b 16K -e 1M -f 2 -g 1"
+        },
+        {
+          "name": "ReduceScatter_Perf_LargeMessages",
+          "description": "ReduceScatter bandwidth test for large messages",
+          "command_args": "-b 2M -e 128M -f 2 -g 1",
+          "timeout": 600
+        }
+      ]
+    },
+    "alltoall_perf": {
+      "extends": "perf_base",
+      "binary": "alltoall_perf",
+      "tests": [
+        {
+          "name": "AllToAll_Perf_SmallMessages",
+          "description": "AllToAll bandwidth test for small messages",
+          "command_args": "-b 8 -e 8K -f 2 -g 1"
+        },
+        {
+          "name": "AllToAll_Perf_MediumMessages",
+          "description": "AllToAll bandwidth test for medium messages",
+          "command_args": "-b 16K -e 1M -f 2 -g 1"
+        },
+        {
+          "name": "AllToAll_Perf_LargeMessages",
+          "description": "AllToAll bandwidth test for large messages",
+          "command_args": "-b 2M -e 64M -f 2 -g 1",
+          "timeout": 600
+        }
+      ]
+    },
+    "sendrecv_perf": {
+      "extends": "perf_base",
+      "binary": "sendrecv_perf",
+      "num_ranks": 2,
+      "tests": [
+        {
+          "name": "SendRecv_Perf_SmallMessages",
+          "description": "SendRecv bandwidth test for small messages",
+          "command_args": "-b 8 -e 8K -f 2 -g 1"
+        },
+        {
+          "name": "SendRecv_Perf_MediumMessages",
+          "description": "SendRecv bandwidth test for medium messages",
+          "command_args": "-b 16K -e 1M -f 2 -g 1"
+        },
+        {
+          "name": "SendRecv_Perf_LargeMessages",
+          "description": "SendRecv bandwidth test for large messages",
+          "command_args": "-b 2M -e 128M -f 2 -g 1",
+          "timeout": 600
+        },
+        {
+          "name": "SendRecv_Perf_Latency",
+          "description": "SendRecv latency test",
+          "command_args": "-b 8 -e 8 -f 1 -g 1 -n 1000"
+        }
+      ]
+    },
+    "allreduce_multinode": {
+      "extends": "perf_base",
+      "binary": "all_reduce_perf",
+      "num_ranks": 16,
+      "num_nodes": 2,
+      "timeout": 600,
+      "env_variables": {
+        "NCCL_IB_DISABLE": "0",
+        "NCCL_NET_GDR_LEVEL": "5",
+        "NCCL_SOCKET_IFNAME": "eth0,eth1"
+      },
+      "tests": [
+        {
+          "name": "AllReduce_MultiNode_SmallMessages",
+          "description": "Multi-node AllReduce test for small messages",
+          "command_args": "-b 8 -e 8K -f 2 -g 1"
+        },
+        {
+          "name": "AllReduce_MultiNode_MediumMessages",
+          "description": "Multi-node AllReduce test for medium messages",
+          "command_args": "-b 16K -e 1M -f 2 -g 1"
+        },
+        {
+          "name": "AllReduce_MultiNode_LargeMessages",
+          "description": "Multi-node AllReduce test for large messages",
+          "command_args": "-b 2M -e 128M -f 2 -g 1",
+          "timeout": 900
+        },
+        {
+          "name": "AllReduce_MultiNode_MaxBandwidth",
+          "description": "Multi-node AllReduce maximum bandwidth test",
+          "command_args": "-b 128M -e 2G -f 2 -g 8",
+          "timeout": 1200
+        }
+      ]
+    },
+    "scatter_gather_perf": {
+      "extends": "perf_base",
+      "binary": "scatter_perf",
+      "tests": [
+        {
+          "name": "Scatter_Perf_SmallMessages",
+          "description": "Scatter bandwidth test for small messages",
+          "command_args": "-b 8 -e 8K -f 2 -g 1"
+        },
+        {
+          "name": "Scatter_Perf_MediumMessages",
+          "description": "Scatter bandwidth test for medium messages",
+          "command_args": "-b 16K -e 1M -f 2 -g 1"
+        },
+        {
+          "name": "Scatter_Perf_LargeMessages",
+          "description": "Scatter bandwidth test for large messages",
+          "command_args": "-b 2M -e 64M -f 2 -g 1"
+        }
+      ]
+    },
+    "gather_perf": {
+      "extends": "perf_base",
+      "binary": "gather_perf",
+      "tests": [
+        {
+          "name": "Gather_Perf_SmallMessages",
+          "description": "Gather bandwidth test for small messages",
+          "command_args": "-b 8 -e 8K -f 2 -g 1"
+        },
+        {
+          "name": "Gather_Perf_MediumMessages",
+          "description": "Gather bandwidth test for medium messages",
+          "command_args": "-b 16K -e 1M -f 2 -g 1"
+        },
+        {
+          "name": "Gather_Perf_LargeMessages",
+          "description": "Gather bandwidth test for large messages",
+          "command_args": "-b 2M -e 64M -f 2 -g 1"
+        }
+      ]
+    },
+    "allreduce_algos": {
+      "extends": "perf_base",
+      "binary": "all_reduce_perf",
+      "num_ranks": 8,
+      "tests": [
+        {
+          "name": "AllReduce_Ring_Algorithm",
+          "description": "AllReduce using Ring algorithm",
+          "command_args": "-b 1M -e 128M -f 2 -g 1",
+          "env_variables": {
+            "NCCL_ALGO": "Ring"
+          }
+        },
+        {
+          "name": "AllReduce_Tree_Algorithm",
+          "description": "AllReduce using Tree algorithm",
+          "command_args": "-b 1M -e 128M -f 2 -g 1",
+          "env_variables": {
+            "NCCL_ALGO": "Tree"
+          }
+        },
+        {
+          "name": "AllReduce_CollNetDirect",
+          "description": "AllReduce using CollNet Direct",
+          "command_args": "-b 1M -e 128M -f 2 -g 1",
+          "env_variables": {
+            "NCCL_ALGO": "CollNetDirect"
+          }
+        }
+      ]
+    },
+    "allreduce_protocols": {
+      "extends": "perf_base",
+      "binary": "all_reduce_perf",
+      "num_ranks": 8,
+      "tests": [
+        {
+          "name": "AllReduce_SimpleProtocol",
+          "description": "AllReduce using Simple protocol",
+          "command_args": "-b 1M -e 128M -f 2 -g 1",
+          "env_variables": {
+            "NCCL_PROTO": "Simple"
+          }
+        },
+        {
+          "name": "AllReduce_LL_Protocol",
+          "description": "AllReduce using LL (Low Latency) protocol",
+          "command_args": "-b 1M -e 128M -f 2 -g 1",
+          "env_variables": {
+            "NCCL_PROTO": "LL"
+          }
+        },
+        {
+          "name": "AllReduce_LL128_Protocol",
+          "description": "AllReduce using LL128 protocol",
+          "command_args": "-b 1M -e 128M -f 2 -g 1",
+          "env_variables": {
+            "NCCL_PROTO": "LL128"
+          }
+        }
+      ]
+    },
+    "stress_tests": {
+      "extends": "perf_base",
+      "binary": "all_reduce_perf",
+      "num_ranks": 8,
+      "timeout": 1800,
+      "tests": [
+        {
+          "name": "AllReduce_Stress_LongDuration",
+          "description": "Long duration AllReduce stress test",
+          "command_args": "-b 1M -e 128M -f 2 -g 8 -n 10000"
+        },
+        {
+          "name": "AllReduce_Stress_MaxSize",
+          "description": "Maximum message size stress test",
+          "command_args": "-b 1G -e 2G -f 2 -g 8",
+          "timeout": 2400
+        },
+        {
+          "name": "AllReduce_Stress_AllSizes",
+          "description": "All message sizes comprehensive test",
+          "command_args": "-b 8 -e 2G -f 2 -g 8 -n 100",
+          "timeout": 3600
+        }
+      ]
+    }
+  },
+  "test_suites": [
+    {
+      "name": "AllReduce Performance Tests",
+      "description": "AllReduce collective bandwidth and latency benchmarks",
+      "config": "allreduce_perf",
+      "enabled": true
+    },
+    {
+      "name": "AllGather Performance Tests",
+      "description": "AllGather collective bandwidth benchmarks",
+      "config": "allgather_perf",
+      "enabled": true
+    },
+    {
+      "name": "Broadcast Performance Tests",
+      "description": "Broadcast collective bandwidth benchmarks",
+      "config": "broadcast_perf",
+      "enabled": true
+    },
+    {
+      "name": "Reduce Performance Tests",
+      "description": "Reduce collective bandwidth benchmarks",
+      "config": "reduce_perf",
+      "enabled": true
+    },
+    {
+      "name": "ReduceScatter Performance Tests",
+      "description": "ReduceScatter collective bandwidth benchmarks",
+      "config": "reducescatter_perf",
+      "enabled": true
+    },
+    {
+      "name": "AllToAll Performance Tests",
+      "description": "AllToAll collective bandwidth benchmarks",
+      "config": "alltoall_perf",
+      "enabled": true
+    },
+    {
+      "name": "SendRecv Performance Tests",
+      "description": "Point-to-point SendRecv bandwidth and latency benchmarks",
+      "config": "sendrecv_perf",
+      "enabled": true
+    },
+    {
+      "name": "Scatter Performance Tests",
+      "description": "Scatter collective bandwidth benchmarks",
+      "config": "scatter_gather_perf",
+      "enabled": false
+    },
+    {
+      "name": "Gather Performance Tests",
+      "description": "Gather collective bandwidth benchmarks",
+      "config": "gather_perf",
+      "enabled": false
+    },
+    {
+      "name": "AllReduce Multi-Node Tests",
+      "description": "Multi-node AllReduce performance tests (requires 2+ nodes)",
+      "config": "allreduce_multinode",
+      "enabled": false
+    },
+    {
+      "name": "AllReduce Algorithm Comparison",
+      "description": "Compare different AllReduce algorithms (Ring, Tree, CollNet)",
+      "config": "allreduce_algos",
+      "enabled": false
+    },
+    {
+      "name": "AllReduce Protocol Comparison",
+      "description": "Compare different protocols (Simple, LL, LL128)",
+      "config": "allreduce_protocols",
+      "enabled": false
+    },
+    {
+      "name": "Stress Tests",
+      "description": "Long duration and maximum size stress tests",
+      "config": "stress_tests",
+      "enabled": false
+    }
+  ]
+}
+
diff --git a/projects/rccl/tools/scripts/test_runner/configs/test_config_sample.json b/projects/rccl/tools/scripts/test_runner/configs/test_config_sample.json
new file mode 100644
index 0000000000..d67191084b
--- /dev/null
+++ b/projects/rccl/tools/scripts/test_runner/configs/test_config_sample.json
@@ -0,0 +1,126 @@
+{
+  "system_configurations": {
+    "name": "rccl-test-system",
+    "description": "Optional description of the system"
+  },
+
+  "paths": {
+    "workdir": "${WORKDIR:-/path/to/rccl}",
+    "rocm_path": "${ROCM_PATH:-/opt/rocm}",
+    "mpi_path": "${MPI_PATH:-/opt/ompi}",
+    "test_binary_dir": "${RCCL_TEST_BIN_DIR:-build/test}"
+  },
+
+  "env_variables": {
+    "HSA_NO_SCRATCH_RECLAIM": "1",
+    "NCCL_DEBUG": "WARN"
+  },
+
+  "build_configuration": {
+    "cmake_options": {
+      "CMAKE_BUILD_TYPE": "Release",
+      "BUILD_TESTS": "ON"
+    },
+    "env_variables": {
+      "HIPCC_COMPILE_FLAGS_APPEND": "-O2"
+    },
+    "parallel_jobs": 64,
+    "generator": "Unix Makefiles"
+  },
+
+  "test_configurations": {
+    "base_config": {
+      "env_variables": {
+        "NCCL_LAUNCH_MODE": "GROUP"
+      },
+      "args": ["--verbose"],
+      "mpi_args": ["--bind-to none"]
+    },
+
+    "gtest_config": {
+      "extends": "base_config",
+      "is_gtest": true,
+      "binary": "rccl-UnitTests",
+      "num_ranks": 1,
+      "num_nodes": 1,
+      "num_gpus": 8,
+      "timeout": 120,
+      "env_variables": {
+        "NCCL_DEBUG": "INFO"
+      },
+      "tests": [
+        {
+          "name": "AllReduceTest",
+          "description": "Test AllReduce with specific parameters",
+          "is_gtest": true,
+          "binary": "rccl-UnitTests",
+          "test_filter": "AllReduce.InPlace",
+          "command_args": "--gtest_also_run_disabled_tests",
+          "num_ranks": 1,
+          "num_nodes": 1,
+          "num_gpus": 4,
+          "timeout": 60,
+          "env_variables": {
+            "NCCL_DEBUG": "TRACE"
+          }
+        },
+        {
+          "name": "BroadcastTest",
+          "test_filter": "Broadcast.*"
+        }
+      ]
+    },
+
+    "mpi_config": {
+      "extends": "base_config",
+      "binary": "rccl-UnitTestsMPI",
+      "num_ranks": 2,
+      "num_nodes": 1,
+      "timeout": 180,
+      "tests": [
+        {"name": "P2pTest", "test_filter": "P2pMPITest.*"},
+        {"name": "ShmTest", "test_filter": "ShmMPITest.*"}
+      ]
+    },
+
+    "perf_config": {
+      "is_gtest": false,
+      "binary": "all_reduce_perf",
+      "num_ranks": 8,
+      "num_nodes": 2,
+      "num_gpus": 4,
+      "timeout": 300,
+      "tests": [
+        {
+          "name": "AllReducePerf",
+          "command_args": "-b 8 -e 128M -f 2 -g 1"
+        }
+      ]
+    }
+  },
+
+  "test_suites": [
+    {
+      "name": "unit_tests",
+      "description": "Unit tests with GTest",
+      "config": "gtest_config",
+      "enabled": true,
+      "num_ranks": 1,
+      "num_nodes": 1,
+      "num_gpus": 8,
+      "timeout": 200,
+      "env_variables": {
+        "NCCL_DEBUG_SUBSYS": "INIT"
+      }
+    },
+    {
+      "name": "mpi_tests",
+      "config": "mpi_config"
+    },
+    {
+      "name": "perf_tests",
+      "config": "perf_config",
+      "enabled": false
+    }
+  ]
+}
diff --git a/projects/rccl/tools/scripts/test_runner/lib/__init__.py b/projects/rccl/tools/scripts/test_runner/lib/__init__.py
new file mode 100644
index 0000000000..61372b54ff
--- /dev/null
+++ b/projects/rccl/tools/scripts/test_runner/lib/__init__.py
@@ -0,0 +1,20 @@
+"""
+RCCL Test Runner Library
+Provides modules for test configuration, parsing, and execution
+"""
+
+from .test_config import TestConfigProcessor
+from .test_parser import ArgumentParserInterface, parse_test_output
+from .test_executor import TestExecutor, ExitCode, TestResult
+
+__all__ = [
+    'TestConfigProcessor',
+    'ArgumentParserInterface',
+    'parse_test_output',
+    'TestExecutor',
+    'ExitCode',
+    'TestResult'
+]
+
+__version__ = '1.0.0'
+
diff --git a/projects/rccl/tools/scripts/test_runner/lib/test_config.py b/projects/rccl/tools/scripts/test_runner/lib/test_config.py
new file mode 100644
index 0000000000..c9b1562a3f
--- /dev/null
+++ b/projects/rccl/tools/scripts/test_runner/lib/test_config.py
@@ -0,0 +1,401 @@
+#!/usr/bin/env python3
+# Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
+#
+# See LICENSE.txt for license information
+"""
+Test Configuration Processor Module
+Handles hierarchical test configuration with inheritance and merging
+"""
+
+import json
+import os
+import re
+from copy import deepcopy
+from pathlib import Path
+from types import MappingProxyType
+
+# Set default WORKDIR to rccl root directory if not already defined
+# This file is at: rccl/tools/scripts/test_runner/lib/test_config.py
+# rccl root is 5 directories up
+if "WORKDIR" not in os.environ:
+    _rccl_root = Path(__file__).resolve().parents[4]
+    os.environ["WORKDIR"] = str(_rccl_root)
+
+
+class TestConfigProcessor:
+    """
+    Processes hierarchical test configurations with support for:
+    - Configuration inheritance ('using' directive)
+    - Environment variable merging
+    - Test parameter inheritance
+    - Environment variable expansion in paths
+    """
+
+    def __init__(self, config_file):
+        """
+        Initialize the TestConfigProcessor with the configuration file.
+
+        Args:
+            config_file: Path to JSON configuration file
+        """
+        if not os.path.exists(config_file):
+            raise FileNotFoundError(f"Configuration file not found: {config_file}")
+
+        # Load the JSON configuration file
+        with open(config_file, 'r') as file:
+            config_data = json.load(file)
+
+        # Expand environment variables in paths section
+        if "paths" in config_data:
+            config_data["paths"] = self._expand_env_vars_in_dict(config_data["paths"])
+
+        # Make the configuration immutable (frozen)
+        self.config = MappingProxyType(config_data)
+        self.config_file = config_file
+
+    def _expand_env_var(self, value):
+        """
+        Expand environment variables in a string.
+
+        Supports both ${VAR} and $VAR syntax.
+        If an environment variable is not set, it will be left unexpanded
+        or replaced with an empty string based on the pattern.
+
+        Args:
+            value: String that may contain environment variables
+
+        Returns:
+            str: String with environment variables expanded
+
+        Examples:
+            "${HOME}/code" -> "/home/user/code"
+            "$ROCM_PATH/bin" -> "/opt/rocm/bin"
+            "${UNDEFINED:-/default}" -> "/default" (bash-style default)
+            "${WORKDIR:-$HOME/code}" -> expands $HOME in default if WORKDIR not set
+        """
+        if not isinstance(value, str):
+            return value
+
+        # Pattern to match ${VAR}, ${VAR:-default}, or $VAR
+        # First, handle ${VAR:-default} pattern
+        def replace_with_default(match):
+            var_name = match.group(1)
+            default_value = match.group(2)
+            # Get the env var, or use default
+            result = os.environ.get(var_name)
+            if result is None:
+                # Recursively expand env vars in the default value
+                result = self._expand_env_var(default_value)
+            return result
+
+        # Replace ${VAR:-default} patterns
+        value = re.sub(r'\$\{([A-Za-z_][A-Za-z0-9_]*):-([^}]*)\}', replace_with_default, value)
+
+        # Replace ${VAR} patterns
+        value = re.sub(r'\$\{([A-Za-z_][A-Za-z0-9_]*)\}',
+                      lambda m: os.environ.get(m.group(1), m.group(0)), value)
+
+        # Replace $VAR patterns (but not ${ to avoid double replacement)
+        value = re.sub(r'\$([A-Za-z_][A-Za-z0-9_]*)',
+                      lambda m: os.environ.get(m.group(1), m.group(0)), value)
+
+        return value
+
+    def _expand_env_vars_in_dict(self, data):
+        """
+        Recursively expand environment variables in all string values in a dictionary.
+
+        Args:
+            data: Dictionary that may contain environment variables in string values
+
+        Returns:
+            dict: Dictionary with all environment variables expanded
+        """
+        if isinstance(data, dict):
+            return {key: self._expand_env_vars_in_dict(value) for key, value in data.items()}
+        elif isinstance(data, list):
+            return [self._expand_env_vars_in_dict(item) for item in data]
+        elif isinstance(data, str):
+            return self._expand_env_var(data)
+        else:
+            return data
+
+    def combine_configs(self, config_name):
+        """
+        Combines configurations generically using the 'extends' directive.
+
+        Merging rules:
+        - env_variables: Overwrite duplicate keys (child overwrites parent)
+        - mpi_args: Append and remove duplicates
+        - args: Append and remove duplicates
+        - tests: Merge by test name
+        - Other fields: Child overwrites parent
+
+        Args:
+            config_name: Name of configuration to combine
+
+        Returns:
+            dict: Combined configuration
+        """
+        test_configs = self.config.get("test_configurations", {})
+        if config_name not in test_configs:
+            raise ValueError(
+                f"Configuration '{config_name}' not found in test_configurations. "
+                f"Available: {', '.join(test_configs.keys())}"
+            )
+
+        # Start with a deep copy of the target configuration
+        combined_config = deepcopy(test_configs[config_name])
+
+        # Process the 'extends' directive if it exists
+        while "extends" in combined_config:
+            parent_configs = combined_config.pop("extends")
+            if not isinstance(parent_configs, list):
+                parent_configs = [parent_configs]
+
+            for parent_config_name in parent_configs:
+                if parent_config_name not in test_configs:
+                    raise ValueError(
+                        f"Parent configuration '{parent_config_name}' not found."
+                    )
+
+                parent_config = deepcopy(test_configs[parent_config_name])
+
+                # Recursively process parent's 'extends' directive
+                if "extends" in parent_config:
+                    parent_config = self.combine_configs(parent_config_name)
+
+                # Merge all keys from parent into combined configuration
+                for key, value in parent_config.items():
+                    if key == "env_variables":
+                        # Merge env_variables (child overwrites parent)
+                        current_env = combined_config.get("env_variables", {})
+                        combined_env = {**value, **current_env}
+                        combined_config["env_variables"] = combined_env
+                    elif key in ["args", "mpi_args"]:
+                        # Append lists and remove duplicates (preserve order)
+                        current_items = combined_config.get(key, [])
+                        if isinstance(current_items, list) and isinstance(value, list):
+                            combined_config[key] = list(dict.fromkeys(value + current_items))
+                        elif isinstance(value, list):
+                            combined_config[key] = value
+                    elif key == "tests":
+                        # Merge tests by name
+                        current_tests = combined_config.get("tests", [])
+                        combined_tests = self._merge_tests(value, current_tests)
+                        combined_config["tests"] = combined_tests
+                    else:
+                        # Child overwrites parent for other keys
+                        if key not in combined_config:
+                            combined_config[key] = value
+
+        return combined_config
+
+    def _merge_tests(self, parent_tests, child_tests):
+        """
+        Merges two lists of tests by name.
+
+        Args:
+            parent_tests: List of parent tests
+            child_tests: List of child tests
+
+        Returns:
+            list: Merged list of tests
+        """
+        merged_tests = []
+        test_map = {}
+
+        # Process parent tests
+        for test in parent_tests:
+            if isinstance(test, str):
+                test_map[test] = {"name": test}
+            elif isinstance(test, dict):
+                name = test.get("name")
+                if name:
+                    test_map[name] = test
+
+        # Process child tests (child overwrites parent)
+        for test in child_tests:
+            if isinstance(test, str):
+                test_map[test] = {"name": test}
+            elif isinstance(test, dict):
+                name = test.get("name")
+                if name:
+                    # Merge with parent test if exists
+                    if name in test_map:
+                        parent_test = test_map[name]
+                        merged_test = {**parent_test, **test}
+                        test_map[name] = merged_test
+                    else:
+                        test_map[name] = test
+
+        # Convert map back to list
+        merged_tests = list(test_map.values())
+        return merged_tests
+
+    def _apply_test_defaults(self, tests, config_defaults):
+        """
+        Apply configuration-level defaults to individual tests.
+
+        Test-specific values override configuration defaults.
+
+        Args:
+            tests: List of test dictionaries
+            config_defaults: Dictionary with default values from configuration
+
+        Returns:
+            list: Tests with defaults applied
+        """
+        # Fields that can have defaults at config level
+        default_fields = ["is_gtest", "binary", "num_ranks", "num_nodes", "num_gpus", "timeout"]
+
+        processed_tests = []
+        for test in tests:
+            # Start with config defaults
+            merged_test = {}
+
+            # Apply defaults for each field if not already in test
+            for field in default_fields:
+                if field in config_defaults:
+                    merged_test[field] = config_defaults[field]
+
+            # Override with test-specific values
+            merged_test.update(test)
+
+            processed_tests.append(merged_test)
+
+        return processed_tests
+
+    def parse_test_suites(self):
+        """
+        Parses the test_suites section and processes each test suite.
+
+        Applies hierarchical defaults in order (test-specific overrides suite, suite overrides config):
+        1. Configuration-level defaults
+        2. Test suite-level defaults (override config)
+        3. Individual test values (override both)
+
+        Returns:
+            list: List of combined configurations for each test suite
+        """
+        test_suites = self.config.get("test_suites", [])
+        combined_suites = []
+
+        for suite in test_suites:
+            config_name = suite.get("config")
+            if not config_name:
+                raise ValueError(
+                    f"Test suite '{suite.get('name')}' does not specify a configuration."
+                )
+
+            # Combine the configuration for the test suite
+            combined_config = self.combine_configs(config_name)
+
+            # Extract configuration-level defaults
+            config_defaults = {
+                "is_gtest": combined_config.get("is_gtest"),
+                "binary": combined_config.get("binary"),
+                "num_ranks": combined_config.get("num_ranks"),
+                "num_nodes": combined_config.get("num_nodes"),
+                "num_gpus": combined_config.get("num_gpus", 8),
+                "timeout": combined_config.get("timeout")
+            }
+            # Remove None values
+            config_defaults = {k: v for k, v in config_defaults.items() if v is not None}
+
+            # Extract suite-level defaults (override config-level)
+            suite_defaults = {
+                "is_gtest": suite.get("is_gtest"),
+                "binary": suite.get("binary"),
+                "num_ranks": suite.get("num_ranks"),
+                "num_nodes": suite.get("num_nodes"),
+                "num_gpus": suite.get("num_gpus"),
+                "timeout": suite.get("timeout")
+            }
+            # Remove None values
+            suite_defaults = {k: v for k, v in suite_defaults.items() if v is not None}
+
+            # Merge defaults: suite-level overrides config-level
+            merged_defaults = {**config_defaults, **suite_defaults}
+
+            # Apply merged defaults to tests
+            tests = combined_config.get("tests", [])
+            if tests and merged_defaults:
+                combined_config["tests"] = self._apply_test_defaults(tests, merged_defaults)
+
+            # Add suite-specific details
+            combined_config["suite_details"] = {
+                "name": suite.get("name"),
+                "description": suite.get("description", ""),
+                "num_nodes": suite.get("num_nodes", 1),
+                "num_ranks": suite.get("num_ranks", 1),
+                "num_gpus": suite.get("num_gpus", 8),
+                "enabled": suite.get("enabled", True)
+            }
+
+            combined_suites.append(combined_config)
+
+        return combined_suites
+
+    def get_system_config(self):
+        """
+        Get system-wide configuration settings.
+
+        Returns:
+            dict: System configuration
+        """
+        return self.config.get("system_configurations", {})
+
+    def get_env_variables(self):
+        """
+        Get global environment variables.
+
+        Returns:
+            dict: Global environment variables
+        """
+        return self.config.get("env_variables", {})
+
+    def get_paths(self):
+        """
+        Get system paths (ROCM, MPI, etc.).
+
+        Returns:
+            dict: System paths
+        """
+        return self.config.get("paths", {})
+
+    def get_build_config(self):
+        """
+        Get build configuration settings.
+
+        Returns:
+            dict: Build configuration with CMake options, environment variables, etc.
+        """
+        return self.config.get("build_configuration", {})
+
+    def validate_config(self):
+        """
+        Validate the configuration for required fields.
+
+        Raises:
+            ValueError: If configuration is invalid
+        """
+        # Check for required top-level keys
+        required_keys = ["test_configurations", "test_suites"]
+        for key in required_keys:
+            if key not in self.config:
+                raise ValueError(f"Missing required configuration key: {key}")
+
+        # Validate test suites
+        test_suites = self.config.get("test_suites", [])
+        if not test_suites:
+            raise ValueError("No test suites defined in configuration")
+
+        for suite in test_suites:
+            if "name" not in suite:
+                raise ValueError("Test suite missing 'name' field")
+            if "config" not in suite:
+                raise ValueError(f"Test suite '{suite['name']}' missing 'config' field")
+
+        return True
+
diff --git a/projects/rccl/tools/scripts/test_runner/lib/test_executor.py b/projects/rccl/tools/scripts/test_runner/lib/test_executor.py
new file mode 100644
index 0000000000..22b51079c4
--- /dev/null
+++ b/projects/rccl/tools/scripts/test_runner/lib/test_executor.py
@@ -0,0 +1,858 @@
+#!/usr/bin/env python3
+# Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
+#
+# See LICENSE.txt for license information
+"""
+Test Executor Module
+Handles test execution, build processes, and result tracking
+"""
+
+import os
+import subprocess
+import sys
+import time
+import datetime
+from enum import IntEnum, Enum
+from pathlib import Path
+
+# Make stdout unbuffered to prevent output ordering issues with subprocesses
+sys.stdout.reconfigure(line_buffering=True)
+
+
+class ExitCode(IntEnum):
+    """Exit codes for processes"""
+    EXIT_SUCCESS = 0
+    EXIT_FAILURE = 1
+    EXIT_TIMEOUT = 124
+
+
+class TestResult(str, Enum):
+    """Test result statuses"""
+    RESULT_PASSED = "PASSED"
+    RESULT_FAILED = "FAILED"
+    RESULT_TIMEOUT = "TIMEOUT"
+    RESULT_SKIPPED = "SKIPPED"
+
+
+class TestExecutor:
+    """
+    Executes tests and manages build/test workflows
+    """
+
+    def __init__(self, config_processor, args):
+        """
+        Initialize TestExecutor
+
+        Args:
+            config_processor: TestConfigProcessor instance
+            args: Parsed command-line arguments
+        """
+        self.config_processor = config_processor
+        self.args = args
+        self.system_config = config_processor.get_system_config()
+        self.paths = config_processor.get_paths()
+        self.global_env = config_processor.get_env_variables()
+        self.build_config = config_processor.get_build_config()
+
+        # Setup directories
+        self.setup_directories()
+
+        # Detect MPI hostfile once during initialization
+        self.mpi_hostfile = self._detect_mpi_hostfile()
+
+        # Test tracking
+        self.test_results = []
+        self.test_names = []
+        self.test_durations = []
+        self.test_suites = []
+
+    def setup_directories(self):
+        """Setup build and log directories"""
+        workdir = self.paths.get("workdir", os.getcwd())
+
+        # Determine workspace name (with or without timestamp)
+        suffix_part = f"_{self.args.report_suffix}" if self.args.report_suffix else ""
+        if self.args.overwrite:
+            workspace_name = f"rccl_test_artifacts{suffix_part}"
+            timestamp_suffix = ""
+        else:
+            timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")
+            workspace_name = f"rccl_test_artifacts{suffix_part}_{timestamp}"
+            timestamp_suffix = f"_{timestamp}"
+
+        # Create workspace directory path
+        self.workspace_dir = os.path.join(workdir, workspace_name)
+
+        # Check for custom RCCL library path from environment variable
+        custom_rccl_path = os.environ.get('RCCL_LIB_PATH') or os.environ.get('RCCL_BUILD_DIR')
+
+        if custom_rccl_path:
+            # Use custom library path from environment variable
+            self.build_dir = os.path.expanduser(os.path.expandvars(custom_rccl_path))
+            self.using_custom_lib = True
+            if self.args.verbose:
+                print(f"Using custom RCCL library path from environment: {self.build_dir}")
+        else:
+            # Use default build directory
+            self.using_custom_lib = False
+            self.build_dir = os.path.join(
+                workdir,
+                f"build_debug_cov_on_tests_on{timestamp_suffix}"
+            )
+
+        # Set log and report directories under workspace
+        self.log_dir = os.path.join(self.workspace_dir, "logs")
+        self.report_dir = os.path.join(self.workspace_dir, "report")
+
+        # Create directories (skip build_dir if using custom lib)
+        if not self.using_custom_lib:
+            os.makedirs(self.build_dir, exist_ok=True)
+        os.makedirs(self.log_dir, exist_ok=True)
+        os.makedirs(self.report_dir, exist_ok=True)
+
+        if self.args.verbose:
+            print(f"Work directory:   {workdir}")
+            print(f"Workspace directory: {self.workspace_dir}")
+            print(f"Build directory:  {self.build_dir}")
+            if self.using_custom_lib:
+                print(f"  (Using custom library from RCCL_LIB_PATH/RCCL_BUILD_DIR)")
+            print(f"Log directory:    {self.log_dir}")
+            print(f"Report directory: {self.report_dir}")
+
+    def _detect_mpi_hostfile(self):
+        """
+        Detect MPI hostfile once during initialization.
+        Checks RCCL_TEST_MPI_HOSTFILE env var, then ~/.mpi_hostfile default.
+        Prints detection message only once.
+
+        Returns:
+            str: Path to hostfile, or None if not found
+        """
+        hostfile = os.environ.get('RCCL_TEST_MPI_HOSTFILE')
+        if hostfile and os.path.isfile(hostfile):
+            print(f"Using MPI hostfile from RCCL_TEST_MPI_HOSTFILE: {hostfile}")
+            return hostfile
+
+        # Check default hostfile
+        default_hostfile = os.path.expanduser('~/.mpi_hostfile')
+        if os.path.isfile(default_hostfile):
+            print(f"Using default MPI hostfile: {default_hostfile}")
+            return default_hostfile
+
+        # No hostfile found
+        return None
+
+    def check_environment(self):
+        """
+        Check that required environment and tools are available
+
+        Returns:
+            bool: True if environment is valid
+        """
+        errors = []
+
+        # Check ROCm
+        rocm_path = self.paths.get("rocm_path", "/opt/rocm")
+        if not os.path.isdir(rocm_path):
+            errors.append(f"ROCm not found at {rocm_path}")
+
+        # Check MPI
+        mpi_path = self.paths.get("mpi_path")
+        if mpi_path:
+            if not os.path.isdir(mpi_path):
+                print(f"WARNING: MPI path not found: {mpi_path}")
+            elif not os.path.isfile(os.path.join(mpi_path, "bin", "mpirun")):
+                print(f"WARNING: mpirun not found in {mpi_path}/bin/")
+
+        # Check RCCL library (if not building or using custom lib)
+        if self.args.no_build or self.using_custom_lib:
+            lib_path = os.path.join(self.build_dir, "librccl.so")
+            if not os.path.isfile(lib_path):
+                errors.append(f"RCCL library not found: {lib_path}")
+            elif self.args.verbose:
+                print(f"Found RCCL library: {lib_path}")
+
+        if errors:
+            print("ERROR: Environment check failed:")
+            for error in errors:
+                print(f"  - {error}")
+            return False
+
+        if self.args.verbose:
+            print("Environment validation passed")
+        return True
+
+    def build_rccl(self):
+        """
+        Build RCCL with test support using configurable build settings
+
+        Returns:
+            bool: True if build succeeded
+        """
+        # Skip build if using custom library from environment variable
+        if self.using_custom_lib:
+            if self.args.verbose:
+                print("SKIP: Build step skipped (using custom RCCL library from environment)")
+            return True
+
+        if self.args.no_build:
+            if self.args.verbose:
+                print("SKIP: Build step skipped (--no-build)")
+            return True
+
+        print("="*80)
+        print("BUILDING RCCL")
+        print("="*80)
+
+        workdir = self.paths.get("workdir", os.getcwd())
+        rocm_path = self.paths.get("rocm_path", "/opt/rocm")
+        mpi_path = self.paths.get("mpi_path", "")
+
+        # Get build configuration (with defaults)
+        cmake_options = self.build_config.get("cmake_options", {})
+        build_env_vars = self.build_config.get("env_variables", {})
+        parallel_jobs = self.build_config.get("parallel_jobs", 64)
+        generator = self.build_config.get("generator", "Unix Makefiles")
+
+        if self.args.verbose:
+            print(f"Work directory:  {workdir}")
+            print(f"ROCm path:       {rocm_path}")
+            print(f"MPI path:        {mpi_path}")
+            print(f"Build directory: {self.build_dir}")
+            print(f"Parallel jobs:   {parallel_jobs}")
+            print(f"Generator:       {generator}")
+
+        # Setup environment for build
+        env = os.environ.copy()
+
+        # Apply default environment variables for code coverage
+        default_env = {
+            'HIPCC_COMPILE_FLAGS_APPEND': (
+                "-g -Wno-format-nonliteral -Xarch_host -fprofile-instr-generate "
+                "-Xarch_host -fcoverage-mapping -parallel-jobs=16"
+            ),
+            'HIPCC_LINK_FLAGS_APPEND': (
+                "-fprofile-instr-generate -fcoverage-mapping -parallel-jobs=16"
+            ),
+            'LLVM_PROFILE_FILE': "rccl_tests_%p_%m.profraw",
+            'CXX': f"{rocm_path}/bin/amdclang++"
+        }
+
+        # Merge with user-provided build environment variables (user values override defaults)
+        for key, value in default_env.items():
+            env[key] = value
+        for key, value in build_env_vars.items():
+            env[key] = str(value)
+
+        # Build CMake configuration command with defaults
+        default_cmake_options = {
+            "CMAKE_CXX_FLAGS": "-Wl,--build-id=sha1",
+            "CMAKE_EXE_LINKER_FLAGS": "-Wl,--build-id=sha1",
+            "CMAKE_BUILD_TYPE": "Debug",
+            "ENABLE_CODE_COVERAGE": "ON",
+            "BUILD_TESTS": "ON",
+            "BUILD_LOCAL_GPU_TARGET_ONLY": "ON",
+            "TRACE": "ON",
+            "COLLTRACE": "ON",
+            "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+            "CMAKE_VERBOSE_MAKEFILE": "1",
+            "ENABLE_MPI_TESTS": "ON",
+            "MPI_PATH": mpi_path
+        }
+
+        # Merge with user-provided CMake options (user values override defaults)
+        merged_cmake_options = {**default_cmake_options, **cmake_options}
+
+        # Build CMake command
+        cmake_cmd = [
+            "cmake",
+            "-S", workdir,
+            "-B", self.build_dir
+        ]
+
+        # Add CMake options as -D flags
+        for key, value in merged_cmake_options.items():
+            cmake_cmd.append(f"-D{key}={value}")
+
+        # Add generator
+        cmake_cmd.append(f"-G{generator}")
+
+        try:
+            print("Running CMake configuration...")
+            if self.args.verbose:
+                print(f"CMake command: {' '.join(cmake_cmd)}")
+                print(f"Build environment variables:")
+                for key, value in build_env_vars.items():
+                    print(f"  {key}={value}")
+
+            result = subprocess.run(
+                cmake_cmd,
+                cwd=workdir,
+                env=env,
+                capture_output=False
+            )
+
+            if result.returncode != 0:
+                print(f"ERROR: CMake configuration failed")
+                return False
+
+            print("\nRunning CMake build...")
+            build_cmd = f"cmake --build {self.build_dir} --parallel {parallel_jobs}"
+            if self.args.verbose:
+                print(f"Build command: {build_cmd}")
+
+            result = subprocess.run(
+                build_cmd,
+                shell=True,
+                cwd=workdir,
+                env=env,
+                capture_output=False
+            )
+
+            if result.returncode != 0:
+                print(f"ERROR: CMake build failed")
+                return False
+
+            print("Build completed successfully")
+            return True
+
+        except Exception as e:
+            print(f"ERROR: Build failed with exception: {e}")
+            return False
+
+    def _resolve_binary_path(self, binary, test_config):
+        """
+        Resolve the test binary path using multiple strategies:
+        1. If binary is an absolute path -> use it directly
+        2. If test_binary_dir is specified in config -> use as base directory
+        3. If binary contains ${VAR} -> expand environment variables
+        4. Otherwise -> use default build_dir/test/binary
+
+        Args:
+            binary: Binary name or path from config
+            test_config: Test configuration dict
+
+        Returns:
+            str: Resolved absolute path to the binary
+        """
+        # Strategy 1: Check if binary is already an absolute path
+        if os.path.isabs(binary):
+            expanded_path = os.path.expandvars(binary)
+            return os.path.expanduser(expanded_path)
+
+        # Strategy 2: Expand environment variables in binary path
+        if '$' in binary or '~' in binary:
+            expanded_path = os.path.expandvars(binary)
+            expanded_path = os.path.expanduser(expanded_path)
+            # If after expansion it becomes absolute, use it
+            if os.path.isabs(expanded_path):
+                return expanded_path
+            # Otherwise treat as relative to test_binary_dir or build_dir
+            binary = expanded_path
+
+        # Strategy 3: Check for custom test_binary_dir in config
+        test_binary_dir = test_config.get("test_binary_dir", "")
+        if test_binary_dir:
+            # Expand environment variables in test_binary_dir
+            test_binary_dir = os.path.expandvars(test_binary_dir)
+            test_binary_dir = os.path.expanduser(test_binary_dir)
+            return os.path.join(test_binary_dir, binary)
+
+        # Strategy 4: Check for test_binary_dir in paths config
+        if "test_binary_dir" in self.paths:
+            test_binary_dir = self.paths["test_binary_dir"]
+            # Expand environment variables in test_binary_dir
+            test_binary_dir = os.path.expandvars(test_binary_dir)
+            test_binary_dir = os.path.expanduser(test_binary_dir)
+            return os.path.join(test_binary_dir, binary)
+
+        # Strategy 5: Default - use build_dir/test/binary
+        return os.path.join(self.build_dir, "test", binary)
+
+    def run_test(self, test_config, suite_config):
+        """
+        Run a single test
+
+        Args:
+            test_config: Test configuration dict
+            suite_config: Test suite configuration dict
+
+        Returns:
+            dict: Test result
+        """
+        test_name = test_config.get("name")
+        is_gtest = test_config.get("is_gtest", True)  # Default to True for backward compatibility
+        description = test_config.get("description", "")
+        binary = test_config.get("binary", "rccl-UnitTestsMPI")
+
+        # Use test_filter for all test types
+        test_filter = test_config.get("test_filter", "*")
+
+        num_ranks = test_config.get("num_ranks", 1)
+        num_nodes = test_config.get("num_nodes", 1)
+        num_gpus = test_config.get("num_gpus", 8)  # GPUs per node (default: 8)
+        timeout = test_config.get("timeout", 0)
+        env_vars = test_config.get("env_variables", {})
+
+        # Support custom command arguments for non-gtest or specialized tests
+        custom_args = test_config.get("command_args", "")
+
+        # Merge environment variables
+        merged_env = {
+            **self.global_env,
+            **suite_config.get("env_variables", {}),
+            **env_vars
+        }
+
+        if self.args.verbose:
+            print(f"\n{'='*80}")
+            print(f"Test: {test_name}")
+            print(f"{'='*80}")
+            if description:
+                print(f"  Description: {description}")
+            print(f"  Type:    {'gtest' if is_gtest else 'non-gtest'}")
+            print(f"  Binary:  {binary}")
+            print(f"  Filter:  {test_filter}")
+            print(f"  Ranks:   {num_ranks}")
+            print(f"  Nodes:   {num_nodes}")
+            print(f"  GPUs/node: {num_gpus}")
+            print(f"  Timeout: {timeout if timeout > 0 else 'unlimited'}")
+            print(f"  Started: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+
+        # Resolve binary path using flexible strategies
+        test_binary_path = self._resolve_binary_path(binary, test_config)
+
+        if self.args.verbose:
+            print(f"  Binary path: {test_binary_path}")
+
+        if not os.path.isfile(test_binary_path):
+            print(f"ERROR: Test binary not found: {test_binary_path}")
+            return {
+                "name": test_name,
+                "result": TestResult.RESULT_FAILED.value,
+                "duration": 0,
+                "error": f"Binary not found: {test_binary_path}"
+            }
+
+        # Setup environment
+        env = os.environ.copy()
+
+        # Build LD_LIBRARY_PATH with build dir and MPI lib (if available)
+        mpi_path = self.paths.get("mpi_path", "")
+        ld_library_path_parts = [self.build_dir]
+        if mpi_path:
+            ld_library_path_parts.append(os.path.join(mpi_path, "lib"))
+        if env.get('LD_LIBRARY_PATH'):
+            ld_library_path_parts.append(env.get('LD_LIBRARY_PATH'))
+        env['LD_LIBRARY_PATH'] = ":".join(ld_library_path_parts)
+
+        # Set LLVM_PROFILE_FILE for code coverage (prevents default.profraw collision)
+        env['LLVM_PROFILE_FILE'] = "rccl_tests_%p_%m.profraw"
+
+        # Add test-specific env vars
+        for key, value in merged_env.items():
+            env[key] = str(value)
+
+        # Build command based on test type
+        if num_ranks == 1:
+            # Non-MPI test - prepend environment variables to the command
+            env_prefix = ""
+            for key, value in merged_env.items():
+                env_prefix += f"{key}={value} "
+
+            if is_gtest:
+                # GTest-based test - use --gtest_filter syntax
+                if test_filter == "ALL" or test_filter == "*":
+                    cmd = f"{env_prefix}./{binary}"
+                else:
+                    cmd = f"{env_prefix}./{binary} --gtest_filter={test_filter}"
+
+                # Add custom arguments if provided
+                if custom_args:
+                    cmd += f" {custom_args}"
+            else:
+                # Non-gtest test (perf, custom, etc.) - run binary with args
+                cmd = f"{env_prefix}./{binary}"
+                if custom_args:
+                    cmd += f" {custom_args}"
+
+        else:
+            # MPI test
+            mpi_path = self.paths.get("mpi_path", "")
+            mpi_cmd = f"{mpi_path}/bin/mpirun" if mpi_path else "mpirun"
+
+            # Use cached hostfile detected during initialization
+            hostfile = self.mpi_hostfile
+
+            # Warn if multi-node test without hostfile
+            if hostfile is None and num_nodes > 1:
+                print("WARNING: Multi-node test without hostfile")
+
+            hostfile_arg = f"--hostfile {hostfile} " if hostfile else ""
+
+            # Determine mapping strategy based on num_gpus and num_nodes
+            # Use PPR (processes per resource) to place num_gpus ranks per node
+            # This ignores the slots specification in the hostfile
+            if num_nodes > 1:
+                # Multi-node test: use ppr to control ranks per node
+                map_by_arg = f"--map-by ppr:{num_gpus}:node "
+            else:
+                # Single node: use default mapping (no need for ppr)
+                map_by_arg = ""
+
+            mpi_args = (
+                f"-np {num_ranks} "
+                f"{hostfile_arg}"
+                f"{map_by_arg}"
+                f"--mca btl ^vader,openib "
+                f"--mca pml ucx "
+                f"--bind-to none"
+            )
+
+            # Add environment variables for MPI
+            for key, value in merged_env.items():
+                mpi_args += f" -x {key}={value}"
+
+            # Pass the LD_LIBRARY_PATH
+            mpi_args += f" -x LD_LIBRARY_PATH={env['LD_LIBRARY_PATH']}"
+
+            # Pass LLVM_PROFILE_FILE to MPI ranks for code coverage (prevents default.profraw collision)
+            mpi_args += f" -x LLVM_PROFILE_FILE=rccl_tests_%p_%m.profraw"
+
+            # Build test command based on type
+            if is_gtest:
+                # GTest-based test - use --gtest_filter syntax
+                if test_filter == "ALL" or test_filter == "*":
+                    cmd = f"{mpi_cmd} {mpi_args} ./{binary}"
+                else:
+                    cmd = f"{mpi_cmd} {mpi_args} ./{binary} --gtest_filter={test_filter}"
+
+                if custom_args:
+                    cmd += f" {custom_args}"
+            else:
+                # Non-gtest test (perf, custom, etc.) - run binary with args
+                cmd = f"{mpi_cmd} {mpi_args} ./{binary}"
+                if custom_args:
+                    cmd += f" {custom_args}"
+
+
+        if self.args.verbose:
+            print(f"\n  Command: {cmd}")
+            print(f"  Working directory: {os.path.join(self.build_dir, 'test')}")
+            print(f"  LD_LIBRARY_PATH: {env.get('LD_LIBRARY_PATH', '')}")
+            print(f"  LLVM_PROFILE_FILE: {env.get('LLVM_PROFILE_FILE', 'Not set')}\n")
+
+        # Execute test
+        start_time = time.time()
+        try:
+            if timeout > 0:
+                result = subprocess.run(
+                    cmd,
+                    shell=True,
+                    cwd=os.path.join(self.build_dir, "test"),
+                    env=env,
+                    capture_output=False,
+                    timeout=timeout
+                )
+            else:
+                result = subprocess.run(
+                    cmd,
+                    shell=True,
+                    cwd=os.path.join(self.build_dir, "test"),
+                    env=env,
+                    capture_output=False
+                )
+
+            duration = time.time() - start_time
+
+            # Determine result
+            if result.returncode == ExitCode.EXIT_SUCCESS:
+                test_result = TestResult.RESULT_PASSED.value
+            elif result.returncode == ExitCode.EXIT_TIMEOUT:
+                test_result = TestResult.RESULT_TIMEOUT.value
+            else:
+                test_result = TestResult.RESULT_FAILED.value
+
+            if self.args.verbose:
+                print(f"\n  Result: {test_result} ({duration:.3f} seconds)")
+
+            return {
+                "name": test_name,
+                "result": test_result,
+                "duration": duration,
+                "exit_code": result.returncode
+            }
+
+        except subprocess.TimeoutExpired:
+            duration = time.time() - start_time
+            if self.args.verbose:
+                print(f"\n  Result: {TestResult.RESULT_TIMEOUT.value} after {timeout} seconds")
+            return {
+                "name": test_name,
+                "result": TestResult.RESULT_TIMEOUT.value,
+                "duration": duration,
+                "error": f"Test timed out after {timeout} seconds"
+            }
+        except Exception as e:
+            duration = time.time() - start_time
+            print(f"\n  ERROR: {e}")
+            return {
+                "name": test_name,
+                "result": TestResult.RESULT_FAILED.value,
+                "duration": duration,
+                "error": str(e)
+            }
+
+    def run_test_suite(self, suite_config):
+        """
+        Run all tests in a test suite
+
+        Args:
+            suite_config: Test suite configuration dict
+
+        Returns:
+            list: List of test results
+        """
+        suite_name = suite_config["suite_details"]["name"]
+
+        if self.args.verbose:
+            print(f"\n{'='*80}")
+            print(f"TEST SUITE: {suite_name}")
+            print(f"{'='*80}")
+
+        tests = suite_config.get("tests", [])
+        if not tests:
+            if self.args.verbose:
+                print(f"WARNING: No tests defined for test suite '{suite_name}'")
+            return []
+
+        results = []
+        for test in tests:
+            # Filter by test name if specified
+            test_name = test.get("name")
+            if self.args.test_name and test_name != self.args.test_name:
+                continue
+
+            result = self.run_test(test, suite_config)
+            results.append(result)
+
+            self.test_names.append(test_name)
+            self.test_results.append(result["result"])
+            self.test_durations.append(result["duration"])
+            self.test_suites.append(suite_name)  # Track suite name
+
+        return results
+
+    def print_summary(self):
+        """Print test execution summary"""
+        total_tests = len(self.test_results)
+        passed = self.test_results.count(TestResult.RESULT_PASSED.value)
+        failed = self.test_results.count(TestResult.RESULT_FAILED.value)
+        timeout = self.test_results.count(TestResult.RESULT_TIMEOUT.value)
+
+        # Get unique test suites that were run
+        unique_suites = sorted(set(self.test_suites)) if self.test_suites else []
+
+        if total_tests > 0:
+            print("\nDetailed Results:")
+            print("-"*120)
+            print(f"{'Test Suite':<40} {'Test Name':<40} {'Result':<10} {'Duration'}")
+            print("-"*120)
+            for i in range(total_tests):
+                print(
+                    f"{self.test_suites[i]:<40} "
+                    f"{self.test_names[i]:<40} "
+                    f"{self.test_results[i]:<10} "
+                    f"{self.test_durations[i]:.3f} seconds"
+                )
+            print("-"*120)
+            print(f"Total Tests:   {total_tests}")
+            print(f"Passed:        {passed}")
+            print(f"Failed:        {failed}")
+            print(f"Timeout:       {timeout}")
+            print("="*120)
+
+    def generate_coverage_report(self):
+        """Generate code coverage report"""
+        if not self.args.coverage_report:
+            return
+
+        print(f"\n{'='*80}")
+        print("GENERATING COVERAGE REPORT")
+        print(f"{'='*80}")
+
+        # Check for profraw files
+        import glob
+        import shutil
+
+        profraw_files = glob.glob(os.path.join(self.build_dir, "**/*.profraw"), recursive=True)
+
+        if not profraw_files:
+            print("WARNING: No profraw files found. Cannot generate coverage report.")
+            return
+
+        print(f"Found {len(profraw_files)} profraw files")
+
+        os.makedirs(self.report_dir, exist_ok=True)
+
+        # Create rawfiles directory
+        rawfiles_dir = os.path.join(self.log_dir, "rawfiles")
+        os.makedirs(rawfiles_dir, exist_ok=True)
+
+        # Move all profraw files into a single location
+        print("Copying profraw files...")
+        for profraw in profraw_files:
+            shutil.copy(profraw, rawfiles_dir)
+
+        # Create a list of raw files to merge
+        rawprofiles_list = os.path.join(self.log_dir, "rawprofiles.list")
+        with open(rawprofiles_list, 'w') as f:
+            for profraw in glob.glob(os.path.join(rawfiles_dir, "*.profraw")):
+                f.write(f"{profraw}\n")
+
+        # Get ROCm path for LLVM tools
+        rocm_path = self.paths.get("rocm_path", "/opt/rocm")
+        llvm_profdata = os.path.join(rocm_path, "lib", "llvm", "bin", "llvm-profdata")
+        llvm_cov = os.path.join(rocm_path, "lib", "llvm", "bin", "llvm-cov")
+
+        # Create the merged profdata
+        print("Merging profraw files...")
+        merged_profdata = os.path.join(self.log_dir, "merged.profdata")
+
+        merge_cmd = [
+            llvm_profdata,
+            "merge",
+            "--sparse",
+            f"--input-files={rawprofiles_list}",
+            f"--output={merged_profdata}"
+        ]
+
+        if self.args.verbose:
+            print(f"Merge command: {' '.join(merge_cmd)}")
+
+        try:
+            result = subprocess.run(
+                merge_cmd,
+                capture_output=True,
+                text=True,
+                check=True
+            )
+            print("Profraw files merged successfully")
+            if self.args.verbose:
+                print(f"Merged profdata file: {merged_profdata}")
+        except subprocess.CalledProcessError as e:
+            print(f"ERROR: Failed to merge profraw files")
+            print(f"Command: {' '.join(merge_cmd)}")
+            print(f"Error: {e.stderr}")
+            return
+
+        # Build list of object files
+        object_files = []
+
+        librccl_so = os.path.join(self.build_dir, "librccl.so")
+        if os.path.isfile(librccl_so):
+            object_files.extend(["--object", librccl_so])
+            if self.args.verbose:
+                print(f"Found library: {librccl_so}")
+
+        # Add test binaries
+        test_dir = os.path.join(self.build_dir, "test")
+        for binary in ["rccl-UnitTestsFixtures", "rccl-UnitTests", "rccl-UnitTestsMPI"]:
+            binary_path = os.path.join(test_dir, binary)
+            if os.path.isfile(binary_path):
+                object_files.extend(["--object", binary_path])
+                if self.args.verbose:
+                    print(f"Found binary: {binary_path}")
+
+        if not object_files:
+            print("WARNING: No object files found for coverage report")
+            return
+
+        if self.args.verbose:
+            print(f"Total object files for coverage: {len(object_files) // 2}")
+
+        # Ignore patterns for non-relevant files
+        ignore_regex = (
+            ".*tuner_v.*|.*profiler_v.*|.*net_v.*|.*_deps.*|ext.*|"
+            ".*coll_net.*|.*nvls.*|.*nvml.*|.*nvtx.*|test/|.*gtest.*"
+        )
+
+        # Create the HTML report
+        print("Generating HTML coverage report...")
+        html_cmd = [
+            llvm_cov,
+            "show",
+            f"--instr-profile={merged_profdata}",
+            "--format=html",
+            "--Xdemangler=c++filt",
+            f"--output-dir={self.report_dir}",
+            "--project-title=RCCL_Lib_Coverage_Report",
+            f"--ignore-filename-regex={ignore_regex}"
+        ]
+        html_cmd.extend(object_files)
+
+        if self.args.verbose:
+            print(f"HTML coverage command: {' '.join(html_cmd)}")
+
+        try:
+            result = subprocess.run(
+                html_cmd,
+                capture_output=True,
+                text=True,
+                check=True
+            )
+            print(f"HTML coverage report generated: {self.report_dir}/index.html")
+        except subprocess.CalledProcessError as e:
+            print(f"ERROR: Failed to generate HTML coverage report")
+            print(f"Error: {e.stderr}")
+            if self.args.verbose:
+                print(f"Command was: {' '.join(html_cmd)}")
+
+        # Generate function coverage summary (text report)
+        print("Generating text coverage report...")
+        text_report = os.path.join(self.report_dir, "function_coverage_report.txt")
+
+        # Build command matching bash script exactly
+        text_cmd = [
+            llvm_cov,
+            "report",
+            f"--instr-profile={merged_profdata}",
+            "--Xdemangler=c++filt"
+        ]
+        # Add object files first
+        text_cmd.extend(object_files)
+        # Add remaining options - matching bash script order
+        text_cmd.extend([
+            f"--ignore-filename-regex={ignore_regex}",
+            "--show-functions",
+            "--sources",
+            self.build_dir
+        ])
+
+        if self.args.verbose:
+            print(f"Text coverage command: {' '.join(text_cmd)}")
+
+        try:
+            with open(text_report, 'w') as f:
+                result = subprocess.run(
+                    text_cmd,
+                    stdout=f,
+                    stderr=subprocess.PIPE,
+                    text=True,
+                    check=True
+                )
+            print(f"Function coverage report generated: {text_report}")
+
+        except subprocess.CalledProcessError as e:
+            print(f"ERROR: Failed to generate text coverage report")
+            print(f"Error: {e.stderr}")
+            if self.args.verbose:
+                print(f"Command was: {' '.join(text_cmd)}")
+
+        print(f"\n{'='*80}")
+        print("COVERAGE REPORT GENERATION COMPLETE")
+        print(f"{'='*80}")
+        print(f"Report directory: {self.report_dir}")
+        print(f"HTML report: {self.report_dir}/index.html")
+        print(f"Text report: {text_report}")
+
diff --git a/projects/rccl/tools/scripts/test_runner/lib/test_parser.py b/projects/rccl/tools/scripts/test_runner/lib/test_parser.py
new file mode 100644
index 0000000000..62f0b43f12
--- /dev/null
+++ b/projects/rccl/tools/scripts/test_runner/lib/test_parser.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
+#
+# See LICENSE.txt for license information
+"""
+Test Parser Module
+Handles command-line argument parsing and test output parsing
+"""
+
+import re
+import argparse
+
+
+class ArgumentParserInterface:
+    """Command-line argument parser for RCCL test runner"""
+
+    def __init__(self):
+        self.parser = argparse.ArgumentParser(
+            description="RCCL Test Runner - Execute and manage RCCL unit tests and MPI tests",
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+            epilog="""
+Examples:
+  # Run all tests from config
+  %(prog)s -c test_config.json
+
+  # Run specific test
+  %(prog)s -c test_config.json --test-name NET_AllTests_2Nodes_ETH
+
+  # Run with verbose output
+  %(prog)s -c test_config.json -v
+
+  # Skip build and use existing build
+  %(prog)s -c test_config.json --no-build
+
+  # Generate coverage report from existing data
+  %(prog)s -c test_config.json --no-build --skip-tests --coverage-report
+            """
+        )
+
+    def add_arguments(self):
+        """Add all command-line arguments"""
+        self.parser.add_argument(
+            '-c', '--config',
+            type=str,
+            required=True,
+            help="Test configuration file (JSON format)"
+        )
+        self.parser.add_argument(
+            '-v', '--verbose',
+            action='store_true',
+            help="Enable verbose output (detailed logging)"
+        )
+        self.parser.add_argument(
+            '-o', '--output',
+            type=str,
+            help="Output directory for logs and reports (default: auto-generated)"
+        )
+        self.parser.add_argument(
+            '--test-name',
+            type=str,
+            help="Run only specific test by name"
+        )
+        self.parser.add_argument(
+            '--no-build',
+            action='store_true',
+            help="Skip build step and use existing build artifacts"
+        )
+        self.parser.add_argument(
+            '--skip-tests',
+            action='store_true',
+            help="Skip test execution (useful with --coverage-report)"
+        )
+        self.parser.add_argument(
+            '--coverage-report',
+            action='store_true',
+            help="Generate code coverage report from profraw files"
+        )
+        self.parser.add_argument(
+            '--overwrite',
+            action='store_true',
+            help="Overwrite previous build/log directories (default: append timestamp)"
+        )
+        self.parser.add_argument(
+            '--report-suffix',
+            type=str,
+            default='',
+            help="Suffix for report directory name (default: blank)"
+        )
+
+    def parse_arguments(self):
+        """Parse command-line arguments"""
+        return self.parser.parse_args()
+
+    def process_arguments(self):
+        """Process and validate command-line arguments"""
+        self.add_arguments()
+        args = self.parse_arguments()
+        self.handle_arguments(args)
+        return args
+
+    def handle_arguments(self, args):
+        """Handle and display parsed arguments"""
+        if args.verbose:
+            print("="*80)
+            print("RCCL Test Runner - Configuration")
+            print("="*80)
+            print(f"Config file:       {args.config}")
+            print(f"Verbose mode:      {args.verbose}")
+            print(f"Output dir:        {args.output if args.output else 'auto-generated'}")
+            print(f"Test name filter:  {args.test_name if args.test_name else 'all tests'}")
+            print(f"No build:          {args.no_build}")
+            print(f"Skip tests:        {args.skip_tests}")
+            print(f"Coverage report:   {args.coverage_report}")
+            print(f"Overwrite:         {args.overwrite}")
+            print(f"Report suffix:     {args.report_suffix}")
+            print("="*80)
+            print()
+
+
+def parse_test_output(output):
+    """
+    Parse test output and extract results
+
+    Args:
+        output: String containing test output
+
+    Returns:
+        dict: Parsed test results including pass/fail status
+    """
+    results = {
+        'passed': False,
+        'failed': False,
+        'skipped': False,
+        'tests_run': 0,
+        'tests_passed': 0,
+        'tests_failed': 0,
+        'errors': []
+    }
+
+    # Google Test output patterns
+    gtest_passed = re.search(r'\[\s*PASSED\s*\]\s*(\d+)\s*test', output)
+    gtest_failed = re.search(r'\[\s*FAILED\s*\]\s*(\d+)\s*test', output)
+    gtest_run = re.search(r'\[==========\]\s*(\d+)\s*test.*ran', output)
+
+    if gtest_run:
+        results['tests_run'] = int(gtest_run.group(1))
+
+    if gtest_passed:
+        results['tests_passed'] = int(gtest_passed.group(1))
+
+    if gtest_failed:
+        results['tests_failed'] = int(gtest_failed.group(1))
+        results['failed'] = True
+    else:
+        results['passed'] = results['tests_run'] > 0
+
+    # Check for skipped tests
+    if 'SKIPPED' in output or 'Skipped' in output:
+        results['skipped'] = True
+
+    # Extract error messages
+    error_pattern = re.compile(r'(ERROR|FAILED|TIMEOUT).*', re.MULTILINE)
+    errors = error_pattern.findall(output)
+    results['errors'] = errors[:10]  # Limit to first 10 errors
+
+    return results
+
diff --git a/projects/rccl/tools/scripts/test_runner/test_runner.py b/projects/rccl/tools/scripts/test_runner/test_runner.py
new file mode 100755
index 0000000000..7075e2686b
--- /dev/null
+++ b/projects/rccl/tools/scripts/test_runner/test_runner.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+"""
+RCCL Test Runner
+Main script for executing RCCL unit tests and MPI tests with hierarchical configuration
+"""
+
+import sys
+import os
+import json
+import logging
+
+from lib.test_parser import ArgumentParserInterface
+from lib.test_config import TestConfigProcessor
+from lib.test_executor import TestExecutor
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+
+def main():
+    """Main entry point for test runner"""
+
+    # Parse command-line arguments
+    parser_interface = ArgumentParserInterface()
+    args = parser_interface.process_arguments()
+
+    # Validate config file exists
+    if not os.path.exists(args.config):
+        print(f"ERROR: Configuration file not found: {args.config}")
+        if args.verbose:
+            print("Exiting: Missing configuration file")
+        return
+
+    try:
+        # Load and validate configuration
+        if args.verbose:
+            print("Loading configuration...")
+        config_processor = TestConfigProcessor(args.config)
+        config_processor.validate_config()
+
+        # Create test executor
+        executor = TestExecutor(config_processor, args)
+
+        # Check environment
+        if not executor.check_environment():
+            if args.verbose:
+                print("Exiting: Environment check failed")
+            return
+
+        # Build RCCL (if not --no-build)
+        if not args.no_build:
+            if not executor.build_rccl():
+                print("ERROR: Build failed")
+                if args.verbose:
+                    print("Exiting: RCCL build failed")
+                return
+
+        # Parse and run test suites
+        if not args.skip_tests:
+            if args.verbose:
+                print("\nParsing test suites...")
+            test_suites = config_processor.parse_test_suites()
+
+            if args.verbose:
+                print("\nCombined Test Suites (JSON):")
+                print(json.dumps(test_suites, indent=2))
+                print()
+                print(f"Found {len(test_suites)} test suite(s)")
+
+            # Print skip messages for disabled test suites upfront
+            print()
+            for suite in test_suites:
+                suite_name = suite["suite_details"]["name"]
+                enabled = suite["suite_details"].get("enabled", True)
+                if not enabled:
+                    print(f"SKIP: Test suite '{suite_name}' is disabled")
+
+            # Run only enabled test suites
+            all_results = []
+            for suite in test_suites:
+                enabled = suite["suite_details"].get("enabled", True)
+                if enabled:
+                    results = executor.run_test_suite(suite)
+                    all_results.extend(results)
+
+            # Print summary once at the end
+            executor.print_summary()
+
+        # Generate coverage report
+        executor.generate_coverage_report()
+
+        # Return based on results
+        if executor.test_results:
+            from lib.test_executor import TestResult
+            failed = executor.test_results.count(TestResult.RESULT_FAILED.value)
+            timeout = executor.test_results.count(TestResult.RESULT_TIMEOUT.value)
+            if failed > 0 or timeout > 0:
+                if args.verbose:
+                    print(f"Exiting: Tests failed (failed={failed}, timeout={timeout})")
+                return
+
+        if args.verbose:
+            print("Exiting: Test run completed successfully")
+        return
+
+    except KeyboardInterrupt:
+        print("\n\nInterrupted by user")
+        if args.verbose:
+            print("Exiting: User interrupted execution")
+        return
+    except Exception as e:
+        print(f"\nERROR: {e}")
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+            print("Exiting: Unhandled exception occurred")
+        return
+
+
+if __name__ == "__main__":
+    main()
+