[rocprofiler-systems] - Implement subset of CTests into PyTests (#2666)

Convert a subset of the ctest to pytest to be used in TheRock CI. Create a new cmake flag `ROCPROFSYS_INSTALL_TESTING` to control test suite installation. - pytest package will be installed to share/rocprofiler-systems/tests - all compiled examples are put in share/rocprofiler-systems/examples - all test relevant scripts are put in share/rocprofiler-systems/tests - see README.md in share/rocprofiler-systems/tests
2026-01-26 23:10:01 -05:00
@@ -37,6 +37,10 @@
 # Python cache files
 *.pyc

+# Python virtual environments
+venv*
+.venv*
+
 # Documentation artifacts
 /_build
 _toc.yml
@@ -1,3 +1,6 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
 cmake_minimum_required(VERSION 3.21 FATAL_ERROR)

 if(
@@ -153,27 +156,60 @@ if(CI_BUILD)
    )
 else()
    rocprofiler_systems_add_option(ROCPROFSYS_BUILD_CI "Enable internal asserts, etc."
-                                   OFF ADVANCED NO_FEATURE
+    OFF ADVANCED NO_FEATURE
    )
    rocprofiler_systems_add_option(ROCPROFSYS_BUILD_EXAMPLES
-                                   "Enable building the examples" OFF ADVANCED
+    "Enable building the examples" OFF ADVANCED
+    )
+    rocprofiler_systems_add_option(ROCPROFSYS_INSTALL_EXAMPLES
+    "Install the examples" OFF
    )
    rocprofiler_systems_add_option(ROCPROFSYS_BUILD_TESTING
-                                   "Enable building the testing suite" OFF ADVANCED
+    "Enable building the testing suite" OFF ADVANCED
+    )
+    rocprofiler_systems_add_option(ROCPROFSYS_INSTALL_TESTING
+    "Install the test suite" OFF
    )
    rocprofiler_systems_add_option(
-        ROCPROFSYS_BUILD_DEBUG "Enable building with extensive debug symbols" OFF
-        ADVANCED
+    ROCPROFSYS_BUILD_DEBUG "Enable building with extensive debug symbols" OFF
+    ADVANCED
    )
    rocprofiler_systems_add_option(
-        ROCPROFSYS_BUILD_HIDDEN_VISIBILITY
-        "Build with hidden visibility (disable for Debug builds)" ON ADVANCED
+    ROCPROFSYS_BUILD_HIDDEN_VISIBILITY
+    "Build with hidden visibility (disable for Debug builds)" ON ADVANCED
    )
    rocprofiler_systems_add_option(ROCPROFSYS_STRIP_LIBRARIES "Strip the libraries"
-                                   ${_STRIP_LIBRARIES_DEFAULT} ADVANCED
+    ${_STRIP_LIBRARIES_DEFAULT} ADVANCED
    )
 endif()

+rocprofiler_systems_add_option(ROCPROFSYS_BUILD_FOR_THEROCK "Build rocprofiler-systems for use with TheRock" OFF
+    ADVANCED NO_FEATURE
+)
+
+if(ROCPROFSYS_BUILD_FOR_THEROCK)
+    set(ROCPROFSYS_INSTALL_TESTING
+        ON
+        CACHE BOOL
+        "Install testing scripts and pytest package"
+        FORCE
+    )
+    set(ROCPROFSYS_USE_PYTESTS ON CACHE BOOL "Enable pytest suite" FORCE)
+    # Lulesh does not build with TheRock
+    if(NOT DEFINED ROCPROFSYS_DISABLE_EXAMPLES)
+        set(ROCPROFSYS_DISABLE_EXAMPLES
+            "lulesh"
+            CACHE STRING
+            "Disable building examples"
+            FORCE
+        )
+    else()
+        if(NOT "lulesh" IN_LIST ROCPROFSYS_DISABLE_EXAMPLES)
+            list(APPEND ROCPROFSYS_DISABLE_EXAMPLES "lulesh")
+        endif()
+    endif()
+endif()
+
 include(Compilers) # compiler identification
 include(BuildSettings) # compiler flags

@@ -267,6 +303,15 @@ elseif("$ENV{ROCPROFSYS_CI}")
    endif()
 endif()

+if(ROCPROFSYS_INSTALL_TESTING)
+    set(ROCPROFSYS_INSTALL_EXAMPLES ON CACHE BOOL "Enable installing examples" FORCE)
+    set(ROCPROFSYS_BUILD_TESTING ON CACHE BOOL "Enable building the testing suite" FORCE)
+endif()
+
+if(ROCPROFSYS_INSTALL_EXAMPLES)
+    set(ROCPROFSYS_BUILD_EXAMPLES ON CACHE BOOL "Enable building the examples" FORCE)
+endif()
+
 if(ROCPROFSYS_BUILD_TESTING)
    set(ROCPROFSYS_BUILD_EXAMPLES ON CACHE BOOL "Enable building the examples" FORCE)
 endif()
@@ -642,7 +642,7 @@ function(CHECK_ROCMINFO _REGEX _RESULT_VARIABLE)
        set(_failure TRUE)
    endif()

-    if(DEFINED ARG_GET_OUTPUT)
+    if(ARG_GET_OUTPUT)
        if(NOT _failure)
            set(${_RESULT_VARIABLE} "${rocminfo_OUTPUT}" PARENT_SCOPE)
        else()
@@ -0,0 +1,19 @@
+# Requirements for rocprofiler-systems pytest test suite
+#
+# Install with:
+#   pip install -r requirements.txt
+
+# Core testing framework
+pytest>=7.4.0
+pytest-subtests>=0.10.0
+pytest-timeout>=2.0.0
+pytest-xdist>=3.0.0
+
+# Optional: Coverage reporting
+pytest-cov>=4.0.0
+
+# Perfetto trace processing (optional, for trace validation)
+perfetto>=0.7.0
+
+# Type checking support (optional)
+typing-extensions>=4.0.0
@@ -102,7 +102,21 @@ if [ "$(basename ${1})" = "cmake" ] && [ "${2}" = "-E" ] && [ "${3}" = "__run_co
    fi
 fi

-if [[ "${CXX_COMPILER}" != "${1}" ]]; then
+# Handle ccache wrapper (the actual compiler is the next argument after ccache)
+CCACHE_PREFIX=""
+ACTUAL_COMPILER="${1}"
+if [ "$(basename ${1})" = "ccache" ]; then
+    if [ -z "${2:-}" ]; then
+        echo -e "\nError: ${BASH_SOURCE[0]} detected 'ccache' as a compiler wrapper," >&2
+        echo "but no underlying compiler was specified as the next argument." >&2
+        echo "Usage: ccache <actual-compiler> [args...]" >&2
+        exit 1
+    fi
+    CCACHE_PREFIX="${1}"
+    ACTUAL_COMPILER="${2}"
+fi
+
+if [[ "${CXX_COMPILER}" != "${ACTUAL_COMPILER}" ]]; then
    debug-message $@
    # the command does not depend on rocprofiler-systems so just execute the command w/o re-directing to ${ROCPROFSYS_COMPILER}
    eval $@
@@ -113,6 +127,11 @@ else
        exit 1
    fi

+    # discard ccache if present
+    if [ -n "${CCACHE_PREFIX}" ]; then
+        shift
+    fi
+
    # discard the compiler from the command
    shift

@@ -123,7 +142,12 @@ else
        export LIBRARY_PATH=${LLVM_LIB_DIR}:${LIBRARY_PATH}
    fi

-    debug-message ${ROCPROFSYS_COMPILER} $@
-    # execute ${ROCPROFSYS_COMPILER} (again, usually nvcc_wrapper)
-    ${ROCPROFSYS_COMPILER} $@
+    # Use ccache with the rocprofiler-systems compiler if ccache was originally requested
+    if [ -n "${CCACHE_PREFIX}" ]; then
+        debug-message ${CCACHE_PREFIX} ${ROCPROFSYS_COMPILER} $@
+        ${CCACHE_PREFIX} ${ROCPROFSYS_COMPILER} $@
+    else
+        debug-message ${ROCPROFSYS_COMPILER} $@
+        ${ROCPROFSYS_COMPILER} $@
+    fi
 fi
@@ -1,81 +1,130 @@
-# MIT License
-#
-# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT

 #
 # rocprofiler-systems tests
 #
 include_guard(GLOBAL)

-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-testing.cmake)
+if(ROCPROFSYS_USE_PYTESTS)
+    include(${CMAKE_CURRENT_LIST_DIR}/pytest/CMakeLists.txt)
+else()
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-testing.cmake)

-# test groups
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-unit-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-config-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-instrument-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-pthread-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-rocm-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-user-api-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-mpi-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-ucx-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-kokkos-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-openmp-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-code-coverage-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-fork-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-time-window-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-attach-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-rccl-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-overflow-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-annotate-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-causal-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-python-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-decode-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-gpu-connect-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-nic-perf.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-roctx-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-rocm-hip-stream.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-binary-tests.cmake)
-include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-thread-limit-tests.cmake)
+    # test groups
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-unit-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-config-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-instrument-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-pthread-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-rocm-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-user-api-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-mpi-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-ucx-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-kokkos-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-openmp-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-code-coverage-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-fork-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-time-window-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-attach-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-rccl-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-overflow-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-annotate-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-causal-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-python-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-decode-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-gpu-connect-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-nic-perf.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-roctx-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-rocm-hip-stream.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-binary-tests.cmake)
+    include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-thread-limit-tests.cmake)

-# -------------------------------------------------------------------------------------- #
+    # -------------------------------------------------------------------------------------- #
+    #
+    # Global cleanup test for temporary files
+    # This runs once after ALL tests complete to clean up trace cache temporary files
+    # Uses FIXTURES_CLEANUP to ensure it runs after all tests requiring the fixture
+    #
+    # -------------------------------------------------------------------------------------- #
+
+    #delete temp files created by rocprofiler-sys tests in /tmp owned by the current user. Always return success.
+    add_test(
+        NAME rocprofsys-cleanup-tmp-files
+        COMMAND
+            sh -c
+            "find /tmp -maxdepth 1 -user $(whoami) \\( -name 'buffered_storage*.bin' -o -name 'metadata*.json' \\) -delete 2>/dev/null || true"
+        WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+    )
+
+    set_tests_properties(
+        rocprofsys-cleanup-tmp-files
+        PROPERTIES
+            FIXTURES_CLEANUP rocprofsys-global-tmp-files
+            LABELS "cleanup;global"
+            TIMEOUT 30
+    )
+endif()
+
+# ------------------------------------------------------------------------------#
 #
-# Global cleanup test for temporary files
-# This runs once after ALL tests complete to clean up trace cache temporary files
-# Uses FIXTURES_CLEANUP to ensure it runs after all tests requiring the fixture
+# Move test files to build directory
 #
-# -------------------------------------------------------------------------------------- #
+# ------------------------------------------------------------------------------#

-#delete temp files created by rocprofiler-sys tests in /tmp owned by the current user. Always return success.
-add_test(
-    NAME rocprofsys-cleanup-tmp-files
+set(ROCPROFSYS_PYTHON_VALIDATION_FILES
+    ${CMAKE_CURRENT_LIST_DIR}/validate-causal-json.py
+    ${CMAKE_CURRENT_LIST_DIR}/validate-perfetto-proto.py
+    ${CMAKE_CURRENT_LIST_DIR}/validate-rocpd.py
+    ${CMAKE_CURRENT_LIST_DIR}/validate-timemory-json.py
+)
+
+set(ROCPROFSYS_TEST_SCRIPTS
+    ${CMAKE_CURRENT_LIST_DIR}/run-rocprof-sys-pid.sh
+    ${CMAKE_CURRENT_LIST_DIR}/get_default_nic.sh
+    ${CMAKE_CURRENT_LIST_DIR}/generate_papi_nic_events.sh
+)
+
+add_custom_target(
+    copy-test-files
+    ALL
    COMMAND
-        sh -c
-        "find /tmp -maxdepth 1 -user $(whoami) \\( -name 'buffered_storage*.bin' -o -name 'metadata*.json' \\) -delete 2>/dev/null || true"
-    WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+        ${CMAKE_COMMAND} -E make_directory
+        ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests
+    COMMAND
+        ${CMAKE_COMMAND} -E copy_if_different ${ROCPROFSYS_PYTHON_VALIDATION_FILES}
+        ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests
+    # copy_directory for directories (copy_if_different only works for files)
+    COMMAND
+        ${CMAKE_COMMAND} -E copy_directory
+        ${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules
+        ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests/rocpd-validation-rules
+    COMMAND
+        ${CMAKE_COMMAND} -E copy_if_different ${ROCPROFSYS_TEST_SCRIPTS}
+        ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests
 )

-set_tests_properties(
-    rocprofsys-cleanup-tmp-files
-    PROPERTIES
-        FIXTURES_CLEANUP rocprofsys-global-tmp-files
-        LABELS "cleanup;global"
-        TIMEOUT 30
-)
+# ------------------------------------------------------------------------------#
+#
+# Pytests install
+#
+# ------------------------------------------------------------------------------#
+
+if(ROCPROFSYS_INSTALL_TESTING)
+    # Python Validation scripts
+    install(
+        PROGRAMS ${ROCPROFSYS_PYTHON_VALIDATION_FILES}
+        DESTINATION share/rocprofiler-systems/tests
+        COMPONENT rocprofiler-systems-tests
+    )
+    install(
+        DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules
+        DESTINATION share/rocprofiler-systems/tests
+        COMPONENT rocprofiler-systems-tests
+    )
+    # Scripts
+    install(
+        PROGRAMS ${ROCPROFSYS_TEST_SCRIPTS}
+        DESTINATION share/rocprofiler-systems/tests
+        COMPONENT rocprofiler-systems-tests
+    )
+endif()
@@ -0,0 +1,85 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+#
+# rocprofiler-systems pytest install
+#
+include_guard(GLOBAL)
+
+# ------------------------------------------------------------------------------#
+# Pytest specific
+# ------------------------------------------------------------------------------#
+
+set(ROCPROFSYS_PYTEST_PACKAGE_FILES
+    ${CMAKE_CURRENT_LIST_DIR}/rocprofsys/__init__.py
+    ${CMAKE_CURRENT_LIST_DIR}/rocprofsys/config.py
+    ${CMAKE_CURRENT_LIST_DIR}/rocprofsys/gpu.py
+    ${CMAKE_CURRENT_LIST_DIR}/rocprofsys/runners.py
+    ${CMAKE_CURRENT_LIST_DIR}/rocprofsys/validators.py
+)
+
+file(GLOB ROCPROFSYS_PYTEST_TEST_FILES "${CMAKE_CURRENT_LIST_DIR}/test_*.py")
+set(ROCPROFSYS_PYTEST_FILES
+    ${CMAKE_CURRENT_LIST_DIR}/conftest.py
+    ${ROCPROFSYS_PYTEST_TEST_FILES}
+)
+
+add_custom_target(
+    copy-pytest-files
+    ALL
+    DEPENDS ${ROCPROFSYS_PYTEST_PACKAGE_FILES} ${ROCPROFSYS_PYTEST_FILES}
+    COMMAND
+        ${CMAKE_COMMAND} -E make_directory
+        ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests/pytest/rocprofsys
+    COMMAND
+        ${CMAKE_COMMAND} -E copy_if_different ${ROCPROFSYS_PYTEST_PACKAGE_FILES}
+        ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests/pytest/rocprofsys/
+    COMMAND
+        ${CMAKE_COMMAND} -E copy_if_different ${ROCPROFSYS_PYTEST_FILES}
+        ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests/pytest/
+    COMMAND
+        ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_SOURCE_DIR}/requirements.txt
+        ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests/
+    COMMAND
+        ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_LIST_DIR}/README.md
+        ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests/
+    COMMENT "Copying pytest files to build directory"
+)
+
+if(ROCPROFSYS_INSTALL_TESTING)
+    # Under install mode, run build_standalone.sh under default mode
+    add_custom_command(
+        OUTPUT ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests/rocprofsys-tests.pyz
+        COMMAND
+            ${CMAKE_CURRENT_LIST_DIR}/build_standalone.sh --output-dir
+            ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests
+        DEPENDS
+            ${CMAKE_CURRENT_LIST_DIR}/build_standalone.sh
+            ${ROCPROFSYS_PYTEST_PACKAGE_FILES}
+            ${ROCPROFSYS_PYTEST_FILES}
+        COMMENT "Building standalone pytest binary"
+        VERBATIM
+    )
+
+    add_custom_target(
+        build-standalone-pytest
+        ALL
+        DEPENDS ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests/rocprofsys-tests.pyz
+    )
+
+    install(
+        FILES ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests/rocprofsys-tests.pyz
+        DESTINATION share/rocprofiler-systems/tests
+        COMPONENT rocprofiler-systems-tests
+    )
+    install(
+        FILES ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests/requirements.txt
+        DESTINATION share/rocprofiler-systems/tests
+        COMPONENT rocprofiler-systems-tests
+    )
+    install(
+        FILES ${CMAKE_BINARY_DIR}/share/rocprofiler-systems/tests/README.md
+        DESTINATION share/rocprofiler-systems/tests
+        COMPONENT rocprofiler-systems-tests
+    )
+endif()
@@ -0,0 +1,149 @@
+# rocprofiler-systems Pytest Suite
+
+## General Use
+
+### Setup
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+```
+
+### Running Tests
+
+Tests can run in two modes: **build** or **install**.
+
+#### Build Mode (Default)
+
+Runs tests using binaries from your build directory.
+
+```bash
+cd <path to rocprofiler-systems>
+pytest <build-dir>/share/rocprofiler-systems/tests/pytest/
+```
+
+Default output directory: `<build-dir>/rocprof-sys-pytest-output/`
+
+If auto detection of the build directory fails, specify `ROCPROFSYS_BUILD_DIR=<path to build-dir>`
+
+#### Install Mode
+
+Runs tests using binaries from your install location.
+
+```bash
+ROCPROFSYS_INSTALL_DIR=<install prefix> pytest <build-dir>/share/rocprofiler-systems/tests/pytest/
+
+# Using /opt/rocprofiler-systems
+ROCPROFSYS_INSTALL_DIR=/opt/rocprofiler-systems pytest <build-dir>/share/rocprofiler-systems/tests/pytest/
+```
+
+Default output directory: `/tmp/$USER/rocprof-sys-pytest-output/`
+
+> **Note:** Install mode requires `ROCPROFSYS_INSTALL_TESTING=ON` during build.
+
+#### Using the Standalone Package
+
+A standalone `.pyz` package is included at `<install-dir>/share/rocprofiler-systems/tests/rocprofsys-tests.pyz`. This can be run directly with Python:
+
+```bash
+python3 <install-dir>/share/rocprofiler-systems/tests/rocprofsys-tests.pyz
+```
+
+All standard pytest flags work with the standalone package.
+
+### Environment Variables
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `ROCPROFSYS_BUILD_DIR` | Path to build directory | Auto-detected |
+| `ROCPROFSYS_INSTALL_DIR` | Path to install prefix (enables install mode) | Not set |
+| `ROCPROFSYS_SOURCE_DIR` | Path to source directory | Auto-detected |
+| `ROCPROFSYS_KEEP_TEST_OUTPUT` | Keep test output on success (`ON`/`OFF`) | `ON` |
+| `ROCPROFSYS_USE_ROCPD` | Enable/disable ROCpd validation (`ON`/`OFF`) | `ON` if available |
+| `ROCPROFSYS_VALIDATE_PERFETTO` | Enable/disable Perfetto tracing (`ON`/`OFF`) | `ON` if available|
+| `ROCPROFSYS_TRACE_PROCESSOR_SHELL` | Path to trace_processor_shell binary | Auto-detected |
+| `ROCM_PATH` | Path to ROCm installation | `/opt/rocm` |
+
+### Common Commands
+
+**Running by marker** (`-m`): Use for running groups of tests with specific labels.
+
+```bash
+# See all available markers
+pytest --markers
+
+# Run tests with a specific marker
+pytest <test-path> -m gpu
+pytest <test-path> -m "slow and gpu"
+pytest <test-path> -m "not slow"
+```
+
+**Running by keyword** (`-k`): Use for running specific test classes or methods.
+
+```bash
+# Run tests matching a keyword
+pytest <test-path> -k transpose
+pytest <test-path> -k "TestTranspose and sampling"
+pytest <test-path> -k "not binary_rewrite"
+```
+
+**Quick Start Examples:**
+
+| Mode | Command |
+|------|---------|
+| Run all tests | `pytest <test-path>` |
+| Recommended | `pytest <test-path> -n auto -v --show-output-on-subtest-fail --show-config` |
+| Standalone package | `python3 <pyz-path>` |
+
+Where `<test-path>` is `<build-dir>/share/rocprofiler-systems/tests/pytest/`
+and `<pyz-path>` is `<install-dir>/share/rocprofiler-systems/tests/rocprofsys-tests.pyz`.
+
+### Parallel Execution (pytest-xdist)
+
+Tests can be run in parallel using `pytest-xdist`:
+
+```bash
+pytest <build-dir>/share/rocprofiler-systems/tests/pytest/ -n auto  # Use all available cores
+pytest <build-dir>/share/rocprofiler-systems/tests/pytest/ -n 4     # Use 4 workers
+```
+
+> **Warning:** Running tests in parallel can cause timeouts due to resource contention, especially for `runtime_instrument` tests. If you experience unexpected timeouts, try reducing the number of workers or running sequentially.
+
+### Custom Flags
+
+| Flag | Description |
+|------|-------------|
+| `--show-config` | Show test configuration in the pytest header |
+| `--show-output` | Show runner output when tests **pass** |
+| `--show-output-on-subtest-fail` | Show runner output only when **subtests** fail |
+| `--output-dir=<path>` | Set the test output directory (default: `<build_dir>/pytest-output`) |
+| `--output-log=<path>` | Write pytest output to the specified file (default: `<output_dir>/pytest-output.txt`) |
+| `--monochrome` | Disable colored output and set `ROCPROFSYS_MONOCHROME=ON` for runners |
+| `--allow-disabled` | Run tests with `@pytest.mark.disable` in CI mode (developer flag) |
+
+**Tip:** Use `--tb=short` to hide source code in tracebacks, or `--tb=no` for no output.
+
+#### Output Display Logic
+
+The `_result_output` fixture controls when runner output is printed:
+
+| Scenario | Default | `--show-output-on-subtest-fail` | `--show-output` |
+|----------|---------|--------------------------------|-----------------|
+| Test passes | ❌ | ❌ | ✅ |
+| Subtest fails | ❌ | ✅ | ✅ |
+| Main test fails | ✅ | ✅ | ✅ |
+
+**Note:** With `--show-output`, runner output appears *before* the failure report. With `--show-output-on-subtest-fail`, it appears *after* (in the FAILURES section). This is due to how pytest processes report sections.
+
+#### Perfetto GLIBC Issue
+
+If Perfetto validation fails due to GLIBC version mismatch (this may occur on RHEL-8.x or SUSE-15.5), set `ROCPROFSYS_TRACE_PROCESSOR_PATH` to a compatible binary.
+
+```bash
+curl -L https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v47.0/linux-amd64/trace_processor_shell -o /tmp/$USER/trace_processor_shell
+chmod +x /tmp/$USER/trace_processor_shell
+export ROCPROFSYS_TRACE_PROCESSOR_PATH=/tmp/$USER/trace_processor_shell
+```
+
+Then run pytest with the environment variable set.
@@ -0,0 +1,643 @@
+#!/bin/bash
+#
+# Build standalone pytest executables for rocprofiler-systems tests
+#
+# This script creates packaging options:
+#   1. PyInstaller: Single binary (~50-100MB), no Python needed on target
+#   2. PyInstaller+Docker: Uses manylinux for broad glibc compatibility
+#   3. Shiv: Python zipapp (~5MB), requires Python on target
+#
+# Usage:
+#   ./build_standalone.sh [--pyinstaller] [--pyinstaller-docker] [--shiv] [--all] [--output-dir DIR]
+#
+# After building, copy the output to your target machine and run:
+#   PyInstaller: ./rocprofsys-tests [pytest args...]
+#   Shiv:        python3 rocprofsys-tests.pyz [pytest args...]
+#
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+OUTPUT_DIR="${SCRIPT_DIR}/dist"
+BUILD_PYINSTALLER=0
+BUILD_PYINSTALLER_DOCKER=0
+BUILD_SHIV=0
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --pyinstaller)
+            BUILD_PYINSTALLER=1
+            shift
+            ;;
+        --pyinstaller-docker)
+            BUILD_PYINSTALLER_DOCKER=1
+            shift
+            ;;
+        --shiv)
+            BUILD_SHIV=1
+            shift
+            ;;
+        --all)
+            BUILD_PYINSTALLER=1
+            BUILD_SHIV=1
+            shift
+            ;;
+        --all-docker)
+            BUILD_PYINSTALLER_DOCKER=1
+            BUILD_SHIV=1
+            shift
+            ;;
+        --output-dir)
+            OUTPUT_DIR="$2"
+            shift 2
+            ;;
+        -h|--help)
+            echo "Usage: $0 [--pyinstaller] [--pyinstaller-docker] [--shiv] [--all] [--output-dir DIR]"
+            echo ""
+            echo "Options:"
+            echo "  --pyinstaller        Build PyInstaller binary (uses system Python/glibc)"
+            echo "  --pyinstaller-docker Build PyInstaller binary in Docker (glibc 2.17+ compatible)"
+            echo "  --shiv               Build Shiv zipapp (requires Python on target)"
+            echo "  --all                Build pyinstaller + shiv"
+            echo "  --all-docker         Build pyinstaller-docker + shiv"
+            echo "  --output-dir         Output directory (default: ./dist)"
+            echo ""
+            echo "NOTE: If you get glibc errors on target, use --pyinstaller-docker or --shiv"
+            echo ""
+            echo "Examples:"
+            echo "  $0 --all                    # Build pyinstaller + shiv"
+            echo "  $0 --pyinstaller-docker     # Build compatible binary via Docker"
+            echo "  $0 --shiv --output-dir /tmp # Build Shiv to /tmp"
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            exit 1
+            ;;
+    esac
+done
+
+# Default to shiv if nothing specified (safest option)
+if [[ $BUILD_PYINSTALLER -eq 0 && $BUILD_PYINSTALLER_DOCKER -eq 0 && $BUILD_SHIV -eq 0 ]]; then
+    BUILD_SHIV=1
+    echo "No option specified, defaulting to --shiv (most compatible)"
+fi
+
+mkdir -p "$OUTPUT_DIR"
+
+echo "=============================================="
+echo "Building rocprofiler-systems pytest packages"
+echo "=============================================="
+echo "Source directory: $SCRIPT_DIR"
+echo "Output directory: $OUTPUT_DIR"
+echo ""
+
+# Create the test runner wrapper script
+create_runner_script() {
+    cat > "${SCRIPT_DIR}/run_rocprofsys_tests.py" << 'RUNNER_EOF'
+#!/usr/bin/env python3
+"""
+Standalone test runner for rocprofiler-systems pytest tests.
+
+This script is designed to be packaged with PyInstaller or Shiv to create
+a standalone executable for running tests on machines with rocprofiler-systems
+installed.
+
+Usage:
+    ./rocprofsys-tests [pytest options...]
+
+Examples:
+    ./rocprofsys-tests                           # Run all tests
+    ./rocprofsys-tests -v                        # Verbose output
+    ./rocprofsys-tests -k transpose              # Run only transpose tests
+    ./rocprofsys-tests --collect-only            # List available tests
+    ./rocprofsys-tests test_transpose.py         # Run specific test file
+
+Environment Variables:
+    ROCPROFSYS_INSTALL_DIR          - Path to rocprofiler-systems installation
+    ROCPROFSYS_BUILD_DIR            - Path to build directory (for development)
+    ROCPROFSYS_SOURCE_DIR           - Path to source directory (for development)
+    ROCPROFSYS_KEEP_TEST_OUTPUT     - Keep test output on success (ON/OFF, default: ON)
+    ROCPROFSYS_USE_ROCPD            - Enable/disable ROCpd validation (ON/OFF, default: ON if available)
+    ROCPROFSYS_VALIDATE_PERFETTO    - Enable/disable Perfetto validation (ON/OFF, default: ON if available)
+    ROCPROFSYS_TRACE_PROC_SHELL     - Path to trace_processor_shell binary (auto-detected)
+    ROCM_PATH                       - Path to ROCm installation (default: /opt/rocm)
+    ROCM_LLVM_OBJDUMP               - Path to ROCm's llvm-objdump (default: auto-detected)
+"""
+import os
+import sys
+
+def get_test_dir():
+    """Find the tests directory - handles both packaged and development modes."""
+    # When packaged with PyInstaller, files are extracted to _MEIPASS
+    if getattr(sys, 'frozen', False):
+        base_path = sys._MEIPASS
+        test_dir = os.path.join(base_path, 'tests', 'pytest')
+        if os.path.isdir(test_dir):
+            return test_dir
+        # Fallback: tests might be at root level
+        test_dir = os.path.join(base_path, 'pytest')
+        if os.path.isdir(test_dir):
+            return test_dir
+        return base_path
+    else:
+        # Running as regular Python script
+        return os.path.dirname(os.path.abspath(__file__))
+
+def main():
+    import pytest
+
+    test_dir = get_test_dir()
+
+    # Add test directory to path so imports work
+    if test_dir not in sys.path:
+        sys.path.insert(0, test_dir)
+
+    # Build pytest arguments
+    args = list(sys.argv[1:])
+
+    # If no test path specified, use the test directory
+    has_test_path = any(
+        arg.endswith('.py') or
+        os.path.isdir(arg) or
+        '::' in arg
+        for arg in args if not arg.startswith('-')
+    )
+
+    if not has_test_path:
+        args.append(test_dir)
+
+    # Print info
+    print(f"rocprofiler-systems pytest runner")
+    print(f"Test directory: {test_dir}")
+    print(f"Arguments: {' '.join(args)}")
+    print("-" * 50)
+
+    # Run pytest
+    return pytest.main(args)
+
+if __name__ == "__main__":
+    sys.exit(main())
+RUNNER_EOF
+    echo "Created: run_rocprofsys_tests.py"
+}
+
+# Build with PyInstaller
+build_pyinstaller() {
+    echo ""
+    echo "=== Building PyInstaller standalone binary ==="
+    echo ""
+
+    # Check if PyInstaller and required packages are installed
+    if ! python3 -c "import PyInstaller" 2>/dev/null; then
+        echo "Installing PyInstaller..."
+        pip install pyinstaller
+    fi
+
+    # Install pytest plugins needed for bundling
+    echo "Installing pytest and required plugins..."
+    pip install pytest pytest-subtests pytest-timeout pytest-xdist
+
+    # Create spec file for more control
+    cat > "${SCRIPT_DIR}/rocprofsys_tests.spec" << SPEC_EOF
+# -*- mode: python ; coding: utf-8 -*-
+import os
+
+block_cipher = None
+
+# Collect all test files and the rocprofsys package
+test_dir = '${SCRIPT_DIR}'
+datas = []
+
+# Add all Python files from the test directory
+for root, dirs, files in os.walk(test_dir):
+    # Skip __pycache__ and build directories
+    dirs[:] = [d for d in dirs if d not in ('__pycache__', 'dist', 'build')]
+    for f in files:
+        if f.endswith(('.py', '.txt', '.md', '.json')):
+            src = os.path.join(root, f)
+            # Compute relative destination
+            rel_path = os.path.relpath(root, test_dir)
+            if rel_path == '.':
+                dst = 'tests/pytest'
+            else:
+                dst = os.path.join('tests/pytest', rel_path)
+            datas.append((src, dst))
+
+a = Analysis(
+    ['${SCRIPT_DIR}/run_rocprofsys_tests.py'],
+    pathex=['${SCRIPT_DIR}'],
+    binaries=[],
+    datas=datas,
+    hiddenimports=[
+        'pytest',
+        '_pytest',
+        '_pytest.assertion',
+        '_pytest.config',
+        '_pytest.fixtures',
+        '_pytest.python',
+        'pytest_subtests',
+        'pytest_subtests.plugin',
+        'pytest_timeout',
+        'xdist',
+        'rocprofsys',
+        'rocprofsys.config',
+        'rocprofsys.runners',
+        'rocprofsys.validators',
+        'rocprofsys.gpu',
+    ],
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+)
+
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    [],
+    name='rocprofsys-tests',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    runtime_tmpdir=None,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+)
+SPEC_EOF
+
+    # Build with PyInstaller
+    cd "$SCRIPT_DIR"
+    python3 -m PyInstaller \
+        --distpath "$OUTPUT_DIR" \
+        --workpath "${SCRIPT_DIR}/build/pyinstaller" \
+        --clean \
+        --noconfirm \
+        rocprofsys_tests.spec
+
+    # Cleanup
+    rm -f "${SCRIPT_DIR}/rocprofsys_tests.spec"
+    rm -rf "${SCRIPT_DIR}/build/pyinstaller"
+
+    echo ""
+    echo "PyInstaller build complete!"
+    echo "Binary: ${OUTPUT_DIR}/rocprofsys-tests"
+    echo "Size: $(du -h "${OUTPUT_DIR}/rocprofsys-tests" | cut -f1)"
+}
+
+# Build with PyInstaller in Docker (manylinux for glibc compatibility)
+build_pyinstaller_docker() {
+    echo ""
+    echo "=== Building PyInstaller binary in Docker (manylinux) ==="
+    echo ""
+
+    # Check if Docker is available
+    if ! command -v docker &> /dev/null; then
+        echo "ERROR: Docker is not installed or not in PATH"
+        echo "Install Docker or use --shiv instead"
+        exit 1
+    fi
+
+    # Create a temporary build context
+    BUILD_CONTEXT=$(mktemp -d)
+    trap "rm -rf \"$BUILD_CONTEXT\"" EXIT
+
+    # Copy test files to build context
+    cp -r "${SCRIPT_DIR}" "${BUILD_CONTEXT}/pytest"
+    cp "${SCRIPT_DIR}/run_rocprofsys_tests.py" "${BUILD_CONTEXT}/" 2>/dev/null || \
+        create_runner_script && cp "${SCRIPT_DIR}/run_rocprofsys_tests.py" "${BUILD_CONTEXT}/"
+
+    # Create Dockerfile
+    cat > "${BUILD_CONTEXT}/Dockerfile" << 'DOCKERFILE_EOF'
+# Use manylinux for broad glibc compatibility (glibc 2.17+)
+FROM quay.io/pypa/manylinux2014_x86_64
+
+# Install Python and pip
+RUN /opt/python/cp310-cp310/bin/python -m pip install --upgrade pip
+RUN /opt/python/cp310-cp310/bin/python -m pip install pyinstaller pytest pytest-subtests pytest-timeout pytest-xdist
+
+# Set Python path
+ENV PATH="/opt/python/cp310-cp310/bin:$PATH"
+
+WORKDIR /build
+
+# Copy test files
+COPY pytest /build/pytest
+COPY run_rocprofsys_tests.py /build/
+
+# Create spec file
+RUN cat > /build/rocprofsys_tests.spec << 'SPEC_EOF'
+# -*- mode: python ; coding: utf-8 -*-
+import os
+
+block_cipher = None
+
+test_dir = '/build/pytest'
+datas = []
+
+for root, dirs, files in os.walk(test_dir):
+    dirs[:] = [d for d in dirs if d not in ('__pycache__', 'dist', 'build')]
+    for f in files:
+        if f.endswith(('.py', '.txt', '.md', '.json')):
+            src = os.path.join(root, f)
+            rel_path = os.path.relpath(root, test_dir)
+            if rel_path == '.':
+                dst = 'tests/pytest'
+            else:
+                dst = os.path.join('tests/pytest', rel_path)
+            datas.append((src, dst))
+
+a = Analysis(
+    ['/build/run_rocprofsys_tests.py'],
+    pathex=['/build/pytest'],
+    binaries=[],
+    datas=datas,
+    hiddenimports=[
+        'pytest', '_pytest', '_pytest.assertion', '_pytest.config',
+        '_pytest.fixtures', '_pytest.python',
+        'pytest_subtests', 'pytest_subtests.plugin', 'pytest_timeout', 'xdist',
+        'rocprofsys', 'rocprofsys.config', 'rocprofsys.runners',
+        'rocprofsys.validators', 'rocprofsys.gpu',
+    ],
+    hookspath=[],
+    runtime_hooks=[],
+    excludes=[],
+    cipher=block_cipher,
+    noarchive=False,
+)
+
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz, a.scripts, a.binaries, a.zipfiles, a.datas, [],
+    name='rocprofsys-tests',
+    debug=False,
+    strip=True,
+    upx=False,
+    console=True,
+)
+SPEC_EOF
+
+# Build
+RUN pyinstaller --clean --noconfirm rocprofsys_tests.spec
+
+# Output is in /build/dist/rocprofsys-tests
+DOCKERFILE_EOF
+
+    echo "Building Docker image..."
+    docker build -t rocprofsys-tests-builder "${BUILD_CONTEXT}"
+
+    echo "Extracting binary from container..."
+    CONTAINER_ID=$(docker create rocprofsys-tests-builder)
+    docker cp "${CONTAINER_ID}:/build/dist/rocprofsys-tests" "${OUTPUT_DIR}/rocprofsys-tests"
+    docker rm "${CONTAINER_ID}"
+
+    # Cleanup
+    docker rmi rocprofsys-tests-builder 2>/dev/null || true
+
+    echo ""
+    echo "PyInstaller (Docker/manylinux) build complete!"
+    echo "Binary: ${OUTPUT_DIR}/rocprofsys-tests"
+    echo "Size: $(du -h "${OUTPUT_DIR}/rocprofsys-tests" | cut -f1)"
+    echo ""
+    echo "This binary is compatible with glibc 2.17+ (RHEL 7, Ubuntu 14.04+, etc.)"
+}
+
+# Build simple zipapp (requires pytest on target, but no glibc issues)
+build_shiv() {
+    echo ""
+    echo "=== Building Python zipapp ==="
+    echo ""
+
+    # Create a temporary directory
+    BUILD_DIR=$(mktemp -d)
+    echo "Build directory: $BUILD_DIR"
+
+    # Create the package structure
+    mkdir -p "${BUILD_DIR}/rocprofsys"
+    mkdir -p "${BUILD_DIR}/tests"
+
+    # Copy the rocprofsys test framework package
+    cp -r "${SCRIPT_DIR}/rocprofsys/"* "${BUILD_DIR}/rocprofsys/"
+
+    # Copy test files
+    cp "${SCRIPT_DIR}"/test_*.py "${BUILD_DIR}/tests/" 2>/dev/null || true
+    cp "${SCRIPT_DIR}/conftest.py" "${BUILD_DIR}/tests/"
+
+    # Ensure __init__.py exists
+    touch "${BUILD_DIR}/rocprofsys/__init__.py"
+    touch "${BUILD_DIR}/tests/__init__.py"
+
+    # Create __main__.py (entry point for zipapp)
+    cat > "${BUILD_DIR}/__main__.py" << 'MAIN_EOF'
+#!/usr/bin/env python3
+"""
+rocprofiler-systems pytest runner - zipapp version
+
+Usage:
+    python3 rocprofsys-tests.pyz [pytest options...]
+
+Requirements:
+    - pytest must be installed: pip install pytest
+    - rocprofiler-systems must be installed on the system
+
+Examples:
+    python3 rocprofsys-tests.pyz --collect-only
+    python3 rocprofsys-tests.pyz -v
+    python3 rocprofsys-tests.pyz -k transpose -v
+"""
+import os
+import sys
+import zipfile
+import tempfile
+import shutil
+import atexit
+
+# Global for cleanup
+_extract_dir = None
+
+def cleanup():
+    """Remove extracted files on exit."""
+    global _extract_dir
+    if _extract_dir and os.path.isdir(_extract_dir):
+        shutil.rmtree(_extract_dir, ignore_errors=True)
+
+def extract_tests():
+    """Extract tests from zipapp to temp directory."""
+    global _extract_dir
+
+    # Find the zipapp path
+    # When running as zipapp, __file__ points inside the zip
+    # The zip path is everything before the first component after .pyz
+    zipapp_path = None
+    for path in sys.path:
+        if path.endswith('.pyz') and os.path.isfile(path):
+            zipapp_path = path
+            break
+
+    if not zipapp_path:
+        # Try to find it from __file__
+        current = os.path.abspath(__file__)
+        while current and not current.endswith('.pyz'):
+            parent = os.path.dirname(current)
+            if parent == current:
+                break
+            current = parent
+        if current.endswith('.pyz'):
+            zipapp_path = current
+
+    if not zipapp_path or not os.path.isfile(zipapp_path):
+        # Not running from zipapp, use local directory
+        return os.path.dirname(os.path.abspath(__file__))
+
+    # Create temp directory and extract
+    _extract_dir = tempfile.mkdtemp(prefix='rocprofsys-tests-')
+    atexit.register(cleanup)
+
+    with zipfile.ZipFile(zipapp_path, 'r') as zf:
+        zf.extractall(_extract_dir)
+
+    return _extract_dir
+
+def main():
+    # Check pytest is available
+    try:
+        import pytest
+    except ImportError:
+        print("ERROR: pytest is not installed")
+        print("Please install it: pip install pytest")
+        sys.exit(1)
+
+    # Extract tests to temp directory
+    app_path = extract_tests()
+
+    # Add app path to sys.path for imports
+    if app_path not in sys.path:
+        sys.path.insert(0, app_path)
+
+    # Find tests directory
+    tests_dir = os.path.join(app_path, 'tests')
+
+    if not os.path.isdir(tests_dir):
+        print(f"ERROR: Tests directory not found: {tests_dir}")
+        print(f"Contents of {app_path}:")
+        for item in os.listdir(app_path):
+            print(f"  {item}")
+        sys.exit(1)
+
+    # Build pytest arguments
+    args = list(sys.argv[1:])
+
+    # Check if user specified a test path
+    has_test_path = any(
+        (arg.endswith('.py') or os.path.isdir(arg) or '::' in arg)
+        for arg in args if not arg.startswith('-')
+    )
+
+    if not has_test_path:
+        args.append(tests_dir)
+
+    # Print info
+    print("=" * 60)
+    print("rocprofiler-systems pytest runner")
+    print("=" * 60)
+    print(f"Tests dir: {tests_dir}")
+    print(f"Command: pytest {' '.join(args)}")
+    print("=" * 60)
+    print()
+
+    # Run pytest
+    return pytest.main(args)
+
+if __name__ == "__main__":
+    sys.exit(main())
+MAIN_EOF
+
+    # Create the zipapp (don't use --main since we have __main__.py)
+    cd "$BUILD_DIR"
+    python3 -m zipapp \
+        --python "/usr/bin/env python3" \
+        --output "${OUTPUT_DIR}/rocprofsys-tests.pyz" \
+        --compress \
+        .
+
+    # Make it executable
+    chmod +x "${OUTPUT_DIR}/rocprofsys-tests.pyz"
+
+    # Cleanup
+    rm -rf "$BUILD_DIR"
+
+    echo ""
+    echo "Zipapp build complete!"
+    echo "Output: ${OUTPUT_DIR}/rocprofsys-tests.pyz"
+    echo "Size: $(du -h "${OUTPUT_DIR}/rocprofsys-tests.pyz" | cut -f1)"
+    echo ""
+    echo "Requirements on target machine:"
+    echo "  - Python 3.8+"
+    echo "  - Install dependencies: pip install pytest pytest-subtests pytest-timeout pytest-xdist"
+}
+
+# Main build process
+create_runner_script
+
+if [[ $BUILD_PYINSTALLER -eq 1 ]]; then
+    build_pyinstaller
+fi
+
+if [[ $BUILD_PYINSTALLER_DOCKER -eq 1 ]]; then
+    build_pyinstaller_docker
+fi
+
+if [[ $BUILD_SHIV -eq 1 ]]; then
+    build_shiv
+fi
+
+# Cleanup runner script
+rm -f "${SCRIPT_DIR}/run_rocprofsys_tests.py"
+
+echo ""
+echo "=============================================="
+echo "Build complete!"
+echo "=============================================="
+echo ""
+echo "Output files in: $OUTPUT_DIR"
+ls -lh "$OUTPUT_DIR" 2>/dev/null || echo "(no files yet)"
+echo ""
+echo "=== How to use on target machine ==="
+echo ""
+echo "1. Copy the binary/zipapp to your target machine"
+echo ""
+echo "2. Ensure rocprofiler-systems is installed and in PATH, or set:"
+echo "   export ROCPROFSYS_INSTALL_DIR=/opt/rocm"
+echo ""
+echo "3. Run tests:"
+if [[ $BUILD_PYINSTALLER -eq 1 || $BUILD_PYINSTALLER_DOCKER -eq 1 ]]; then
+    echo "   PyInstaller: ./rocprofsys-tests -v"
+fi
+if [[ $BUILD_SHIV -eq 1 ]]; then
+    echo "   Shiv:        python3 rocprofsys-tests.pyz -v"
+    echo "   (Requires: pip install pytest pytest-subtests pytest-timeout pytest-xdist)"
+fi
+echo ""
+echo "4. Common pytest options:"
+echo "   -v                  Verbose output"
+echo "   -k 'transpose'      Run only tests matching 'transpose'"
+echo "   --collect-only      List available tests"
+echo "   -x                  Stop on first failure"
+echo ""
@@ -0,0 +1,74 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+rocprofsys testing utilities package.
+
+Provides reusable components for testing rocprofiler-systems:
+- Test runners (sampling, binary rewrite, runtime instrument)
+- Output validators (perfetto, rocpd, timemory, regex patterns)
+- Configuration management
+- GPU and system detection utilities
+"""
+
+from .config import (
+    RocprofsysConfig,
+    discover_install_config,
+    discover_build_config,
+)
+
+from .runners import (
+    TestResult,
+    BaselineRunner,
+    SamplingRunner,
+    BinaryRewriteRunner,
+    RuntimeInstrumentRunner,
+    SysRunRunner,
+)
+from .validators import (
+    ValidationResult,
+    validate_perfetto_trace,
+    validate_rocpd_database,
+    validate_timemory_json,
+    validate_causal_json,
+    validate_file_exists,
+    validate_regex,
+)
+
+from .gpu import (
+    GPUInfo,
+    get_rocminfo,
+    detect_gpu,
+    lookup_gpu_category,
+    get_target_gpu_arch,
+    get_offload_extractor,
+)
+
+__all__ = [
+    # Config
+    "RocprofsysConfig",
+    "discover_build_config",
+    "discover_install_config",
+    # Runners
+    "TestResult",
+    "BaselineRunner",
+    "SamplingRunner",
+    "BinaryRewriteRunner",
+    "RuntimeInstrumentRunner",
+    "SysRunRunner",
+    # Validators
+    "ValidationResult",
+    "validate_perfetto_trace",
+    "validate_rocpd_database",
+    "validate_timemory_json",
+    "validate_causal_json",
+    "validate_file_exists",
+    "validate_regex",
+    # GPU
+    "GPUInfo",
+    "get_rocminfo",
+    "detect_gpu",
+    "lookup_gpu_category",
+    "get_target_gpu_arch",
+    "get_offload_extractor",
+]
@@ -0,0 +1,505 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+from __future__ import annotations
+from dataclasses import dataclass
+import getpass
+import os
+from pathlib import Path
+import shutil
+import tempfile
+from typing import Optional
+import re
+
+
+@dataclass
+class RocprofsysConfig:
+    """Configuration for rocprofiler-systems test execution
+
+    Contains necessary paths to configure tests for build or for install modes.
+
+        Attributes:
+        - rocprofsys_build_dir: Path to either the build or install directory
+        - rocprofsys_instrument: Path to rocprof-sys-instrument executable
+        - rocprofsys_run: Path to rocprof-sys-run executable
+        - rocprofsys_sample: Path to rocprof-sys-sample executable
+        - rocprofsys_causal: Path to rocprof-sys-causal executable
+        - rocprofsys_avail: Path to rocprof-sys-avail executable
+        - rocm_path: Path to ROCm installation directory
+        - rocprofsys_lib_dir: Path to rocprofsys library directory
+        - rocprofsys_bin_dir: Path to rocprofsys binary directory
+        - rocprofsys_examples_dir:
+            In build mode, this is the root of the build directory.
+            In install mode, this is the examples/ directory.
+        - rocprofsys_tests_dir: Path to rocprofsys tests directory
+        - test_output_dir: Path to test output directory
+        - rocpd_validation_rules: Path to rocprofiler-systems rocpd validation rules directory
+        - mpiexec: Path to MPI launcher executable
+        - is_installed: Whether this is an installed configuration
+    """
+
+    rocprofsys_build_dir: Path
+    rocprofsys_instrument: Path
+    rocprofsys_run: Path
+    rocprofsys_sample: Path
+    rocprofsys_causal: Path
+    rocprofsys_avail: Path
+    rocm_path: Path
+    rocprofsys_lib_dir: Path
+    rocprofsys_bin_dir: Path
+    rocprofsys_examples_dir: Path
+    rocprofsys_tests_dir: Path
+    rocpd_validation_rules: Path
+    test_output_dir: Path
+    mpiexec: Path
+    is_installed: bool = False
+    rocm_version: Optional[tuple[int, int, int]] = None
+
+    def get_llvm_lib_paths(self) -> list[Path]:
+        """Get list of found ROCm LLVM lib paths.
+
+        Returns:
+            List of existing LLVM lib paths found, empty list if none found.
+        """
+        found_paths = []
+        if self.rocm_path:
+            # Match discover_llvm_libdir_for_ompt() logic
+            candidates = [
+                self.rocm_path / "llvm" / "lib",
+                self.rocm_path / "lib" / "llvm" / "lib",
+            ]
+            for candidate in candidates:
+                if candidate.exists():
+                    found_paths.append(candidate)
+        return found_paths
+
+    def get_library_path(self) -> str:
+        """Get LD_LIBRARY_PATH including rocprofiler-systems libraries.
+
+        Returns:
+            LD_LIBRARY_PATH string with rocprofiler-systems libraries
+        """
+        paths = [str(self.rocprofsys_lib_dir.resolve())]
+
+        existing = os.environ.get("LD_LIBRARY_PATH", "")
+        if existing:
+            paths.append(existing)
+
+        # Add ROCm LLVM lib as fallback
+        for llvm_path in self.get_llvm_lib_paths():
+            paths.append(str(llvm_path))
+
+        return ":".join(paths)
+
+    def get_target_executable(self, name: str) -> Path:
+        """Get path to a test target executable.
+
+        When is_installed is True, searches in the following order:
+        1. rocprofsys_build_dir/name (build directory layout)
+        2. rocprofsys_examples_dir/name/name (build directory layout)
+        3. PATH lookup
+
+        When is_installed is False, searches in the following order:
+        1. rocprofsys_examples_dir/name
+        2. rocprofsys_bin_dir/name
+        3. PATH lookup
+
+        Args:
+            name: Name of the target executable
+
+        Returns:
+            Path to the executable
+
+        Raises:
+            FileNotFoundError: If the executable is not found
+        """
+
+        if self.is_installed:
+            # examples directory layout
+            exe = self.rocprofsys_examples_dir / name
+            if exe.exists() and exe.is_file():
+                return exe
+
+            # binary directory
+            exe = self.rocprofsys_bin_dir / name
+            if exe.exists() and exe.is_file():
+                return exe
+
+            # PATH lookup via shutil.which
+            exe = shutil.which(name)
+            if exe:
+                return Path(exe)
+
+            raise FileNotFoundError(
+                f"Target executable '{name}' not found. Searched in:\n"
+                f"  - {self.rocprofsys_examples_dir}/{name}\n"
+                f"  - {self.rocprofsys_bin_dir}/{name}\n"
+                f"  - PATH"
+            )
+
+        else:
+            # Build directory mode
+            exe = self.rocprofsys_examples_dir / name
+            if exe.exists() and exe.is_file():
+                return exe
+
+            exe = self.rocprofsys_examples_dir / "examples" / name / name
+            if exe.exists() and exe.is_file():
+                return exe
+
+            # rccl tests lie in their own directory
+            exe = self.rocprofsys_examples_dir / "examples" / "rccl" / name
+            if exe.exists() and exe.is_file():
+                return exe
+
+            # binary directory
+            exe = self.rocprofsys_bin_dir / name
+            if exe.exists() and exe.is_file():
+                return exe
+
+            # PATH lookup via shutil.which
+            exe = shutil.which(name)
+            if exe:
+                return Path(exe)
+
+            raise FileNotFoundError(
+                f"Target executable '{name}' not found. Searched in:\n"
+                f"  - {self.rocprofsys_examples_dir}/{name}\n"
+                f"  - {self.rocprofsys_examples_dir}/examples/{name}/{name}\n"
+                f"  - {self.rocprofsys_bin_dir}/{name}\n"
+                f"  - PATH"
+            )
+
+    def get_fundamental_environment(self) -> dict[str, str]:
+        """Get fundamental environment variables inherited from parent process."""
+        return {
+            "PATH": os.environ.get("PATH", ""),
+            "HOME": os.environ.get("HOME", ""),
+            "USER": os.environ.get("USER", ""),
+            "SHELL": os.environ.get("SHELL", ""),
+            "TERM": os.environ.get("TERM", ""),
+            "LANG": os.environ.get("LANG", ""),
+        }
+
+    def get_base_environment(self) -> dict[str, str]:
+        """Get base environment variables for test execution."""
+        return {
+            "ROCPROFSYS_CI": "ON",
+            "ROCPROFSYS_CONFIG_FILE": "",
+            "ROCPROFSYS_TRACE": "ON",
+            "ROCPROFSYS_PROFILE": "ON",
+            "ROCPROFSYS_USE_SAMPLING": "ON",
+            "ROCPROFSYS_USE_PROCESS_SAMPLING": "ON",
+            "ROCPROFSYS_TIME_OUTPUT": "OFF",
+            "ROCPROFSYS_FILE_OUTPUT": "ON",
+            "ROCPROFSYS_USE_PID": "OFF",
+            "ROCPROFSYS_VERBOSE": "1",
+            "ROCPROFSYS_SAMPLING_FREQ": "300",
+            "ROCPROFSYS_SAMPLING_DELAY": "0.05",
+            "OMP_PROC_BIND": "spread",
+            "OMP_PLACES": "threads",
+            "OMP_NUM_THREADS": "2",
+            "LD_LIBRARY_PATH": self.get_library_path(),
+        }
+
+    def get_base_binary_environment(self) -> dict[str, str]:
+        """Get base environment variables for rocprof-sys binary test execution."""
+        return {
+            "ROCPROFSYS_TRACE": "ON",
+            "ROCPROFSYS_PROFILE": "ON",
+            "ROCPROFSYS_USE_SAMPLING": "ON",
+            "ROCPROFSYS_TIME_OUTPUT": "OFF",
+            "LD_LIBRARY_PATH": self.get_library_path(),
+            "ROCPROFSYS_CI": "ON",
+            "ROCPROFSYS_CI_TIMEOUT": "300",
+            "ROCPROFSYS_CONFIG_FILE": "",
+        }
+
+
+def _find_rocm_path() -> Optional[Path]:
+    """Find ROCm installation path."""
+    for candidate in [
+        os.environ.get("ROCM_PATH"),
+        "/opt/rocm",
+        "/usr/local/rocm",
+    ]:
+        if candidate and Path(candidate).exists():
+            return Path(candidate).resolve()
+    return None
+
+
+def _get_rocm_version() -> Optional[tuple[int, int, int]]:
+    """Get the installed ROCm version as a tuple (major, minor, patch).
+
+    Returns:
+        Tuple of (major, minor, patch) or None if ROCm not found or version undetectable.
+    """
+    rocm_path = _find_rocm_path()
+    if not rocm_path:
+        return None
+
+    # Check .info/version file
+    version_file = rocm_path / ".info" / "version"
+    if not version_file.exists():
+        # Try alternative location
+        version_file = rocm_path / "share" / "rocm" / "version"
+
+    if version_file.exists():
+        try:
+            version_str = version_file.read_text().strip()
+            match = re.match(r"(\d+)\.(\d+)\.(\d+)", version_str)
+            if match:
+                return (int(match.group(1)), int(match.group(2)), int(match.group(3)))
+        except (OSError, ValueError):
+            pass
+
+    return None
+
+
+def _find_mpiexec() -> Optional[Path]:
+    """Find MPI launcher executable."""
+    for candidate in ["mpiexec", "mpirun"]:
+        path = shutil.which(candidate)
+        if path:
+            return Path(path)
+    return None
+
+
+def _find_executable(name: str, search_paths: list[Path]) -> Optional[Path]:
+    """Find an executable in search paths or via PATH."""
+    for search_dir in search_paths:
+        exe = search_dir / name
+        if exe.exists() and exe.is_file():
+            return exe.resolve()
+
+    # Fallback to PATH
+    path_exe = shutil.which(name)
+    if path_exe:
+        return Path(path_exe)
+
+    return None
+
+
+def discover_install_config(
+    install_dir: Optional[Path] = None,
+    output_dir: Optional[Path] = None,
+) -> RocprofsysConfig:
+    """Discover rocprofiler-systems installation configuration.
+
+    Creates configuration for testing against installed binaries.
+
+    Args:
+        install_dir: Installation prefix (e.g., /opt/rocm or /usr/local)
+
+    Returns:
+        RocprofsysConfig configured for installed binaries
+
+    Raises:
+        FileNotFoundError: If installation cannot be found
+    """
+
+    if install_dir is None:
+        env_install = os.environ.get("ROCPROFSYS_INSTALL_DIR")
+        if env_install:
+            install_dir = Path(env_install).resolve()
+        else:
+            for candidate in [
+                _find_rocm_path(),
+                Path("/usr/local"),
+                Path("/usr"),
+                Path(
+                    "/opt/rocprofiler-systems"
+                ),  # Standard install location from README.md
+            ]:
+                if (
+                    candidate
+                    and (candidate / "share" / "rocprofiler-systems" / "tests").is_dir()
+                    and (
+                        candidate / "share" / "rocprofiler-systems" / "examples"
+                    ).is_dir()
+                ):
+                    install_dir = candidate
+                    break
+
+    if install_dir is None:
+        raise FileNotFoundError(
+            "Could not find a suitable rocprofiler-systems installation. Set ROCPROFSYS_INSTALL_DIR "
+            "environment variable."
+            "A suitable installation is one that has the following directory: share/rocprofiler-systems/examples "
+            "and share/rocprofiler-systems/tests"
+        )
+
+    install_dir = install_dir.resolve()
+
+    # Determine directory layout
+    bin_dir = install_dir / "bin"
+    lib_dir = install_dir / "lib"
+
+    # For lib64 systems
+    if not lib_dir.exists() and (install_dir / "lib64").exists():
+        lib_dir = install_dir / "lib64"
+
+    examples_dir = install_dir / "share" / "rocprofiler-systems" / "examples"
+    tests_dir = install_dir / "share" / "rocprofiler-systems" / "tests"
+    rocpd_validation_rules = tests_dir / "rocpd-validation-rules"
+
+    # Create a temporary directory for test outputs
+    try:
+        username = getpass.getuser()
+    except Exception:
+        username = str(os.getuid())
+
+    if output_dir is None:
+        output_dir = Path(tempfile.gettempdir()) / username / "rocprof-sys-pytest-output"
+    else:
+        output_dir = Path(output_dir)
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    rocm_path = _find_rocm_path()
+    mpiexec = _find_mpiexec()
+
+    search_paths = [bin_dir]
+    rocprof_instrument = _find_executable("rocprof-sys-instrument", search_paths)
+    rocprof_sample = _find_executable("rocprof-sys-sample", search_paths)
+    rocprof_run = _find_executable("rocprof-sys-run", search_paths)
+    rocprof_causal = _find_executable("rocprof-sys-causal", search_paths)
+    rocprof_avail = _find_executable("rocprof-sys-avail", search_paths)
+
+    # If any of the executables are not found, raise an error
+    required_executables = {
+        "rocprof-sys-instrument": rocprof_instrument,
+        "rocprof-sys-sample": rocprof_sample,
+        "rocprof-sys-run": rocprof_run,
+        "rocprof-sys-causal": rocprof_causal,
+        "rocprof-sys-avail": rocprof_avail,
+    }
+
+    missing = [name for name, path in required_executables.items() if path is None]
+    if missing:
+        raise FileNotFoundError(
+            f"Required executables not found: {', '.join(missing)}. "
+            f"Searched in: {search_paths}"
+        )
+
+    return RocprofsysConfig(
+        rocprofsys_build_dir=install_dir,
+        rocprofsys_instrument=rocprof_instrument,
+        rocprofsys_run=rocprof_run,
+        rocprofsys_sample=rocprof_sample,
+        rocprofsys_causal=rocprof_causal,
+        rocprofsys_avail=rocprof_avail,
+        rocm_path=rocm_path,
+        rocprofsys_lib_dir=lib_dir,
+        rocprofsys_bin_dir=bin_dir,
+        rocprofsys_examples_dir=examples_dir,
+        rocprofsys_tests_dir=tests_dir,
+        rocpd_validation_rules=rocpd_validation_rules,
+        test_output_dir=output_dir,
+        mpiexec=mpiexec,
+        rocm_version=_get_rocm_version(),
+        is_installed=True,
+    )
+
+
+def discover_build_config(
+    build_dir: Optional[Path] = None,
+    output_dir: Optional[Path] = None,
+) -> RocprofsysConfig:
+    """Discover rocprofiler-systems build configuration.
+
+    Attempts to find the build directory and source directory automatically
+    if not provided, checking common locations and environment variables.
+
+    If no build directory is found but an installation is available,
+    falls back to discover_install_config().
+
+    Args:
+        build_dir: Explicit build directory path
+
+    Returns:
+        RocprofsysConfig with discovered paths
+
+    Raises:
+        FileNotFoundError: If neither build directory nor installation found
+    """
+
+    # Explicit install directory check
+    if os.environ.get("ROCPROFSYS_INSTALL_DIR"):
+        return discover_install_config(output_dir=output_dir)
+
+    # When running from pyz package (extracted to /tmp), fall back to install config
+    # The pyz extracts to paths like /tmp/rocprofsys-tests-*/tests/rocprofsys/config.py
+    current_file = Path(__file__).resolve()
+    if str(current_file).startswith(tempfile.gettempdir()):
+        return discover_install_config()
+
+    # All files should be in the build directory
+    if build_dir is None:
+        env_build = os.environ.get("ROCPROFSYS_BUILD_DIR")
+        if env_build:
+            build_dir = Path(env_build).resolve()
+        else:
+            build_dir = Path(__file__).resolve().parent.parent.parent.parent.parent.parent
+
+    if build_dir is None or not build_dir.exists():
+        raise FileNotFoundError(
+            "Could not find build directory or installation. Set one of:\n"
+            "  - ROCPROFSYS_BUILD_DIR: Path to build directory\n"
+            "  - ROCPROFSYS_INSTALL_DIR: Path to installation prefix"
+        )
+
+    rocm_path = _find_rocm_path()
+    mpiexec = _find_mpiexec()
+
+    bin_dir = build_dir / "bin"
+    lib_dir = build_dir / "lib"
+
+    search_paths = [bin_dir]
+    rocprof_instrument = _find_executable("rocprof-sys-instrument", search_paths)
+    rocprof_sample = _find_executable("rocprof-sys-sample", search_paths)
+    rocprof_run = _find_executable("rocprof-sys-run", search_paths)
+    rocprof_causal = _find_executable("rocprof-sys-causal", search_paths)
+    rocprof_avail = _find_executable("rocprof-sys-avail", search_paths)
+
+    # If any of the executables are not found, raise an error
+    required_executables = {
+        "rocprof-sys-instrument": rocprof_instrument,
+        "rocprof-sys-sample": rocprof_sample,
+        "rocprof-sys-run": rocprof_run,
+        "rocprof-sys-causal": rocprof_causal,
+        "rocprof-sys-avail": rocprof_avail,
+    }
+
+    missing = [name for name, path in required_executables.items() if path is None]
+    if missing:
+        raise FileNotFoundError(
+            f"Required executables not found: {', '.join(missing)}. "
+            f"Searched in: {search_paths}"
+        )
+
+    share_path = build_dir / "share" / "rocprofiler-systems"
+
+    if output_dir is None:
+        output_dir = build_dir / "rocprof-sys-pytest-output"
+    else:
+        output_dir = Path(output_dir)
+
+    return RocprofsysConfig(
+        rocprofsys_build_dir=build_dir,
+        rocprofsys_instrument=rocprof_instrument,
+        rocprofsys_run=rocprof_run,
+        rocprofsys_sample=rocprof_sample,
+        rocprofsys_causal=rocprof_causal,
+        rocprofsys_avail=rocprof_avail,
+        rocm_path=rocm_path,
+        rocprofsys_lib_dir=lib_dir,
+        rocprofsys_bin_dir=bin_dir,
+        rocprofsys_examples_dir=build_dir,  # Example binaries are (almost always) in root of build directory
+        rocprofsys_tests_dir=share_path / "tests",
+        rocpd_validation_rules=share_path / "tests" / "rocpd-validation-rules",
+        test_output_dir=output_dir,
+        mpiexec=mpiexec,
+        rocm_version=_get_rocm_version(),
+        is_installed=False,
+    )
@@ -0,0 +1,364 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+from __future__ import annotations
+import re
+import shutil
+import subprocess
+import os
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+from typing import Optional
+
+
+@dataclass
+class GPUInfo:
+    """Information about detected GPU(s)
+
+    Attributes:
+        available: Whether any GPU is available
+        architectures: List of GPU architectures
+        device_count: Number of GPUs detected
+        categories: Categories the GPU belongs to (instinct, radeon, apu)
+    """
+
+    available: bool
+    architectures: list[str]
+    device_count: int
+    categories: set[str]
+
+    @property
+    def rocm_events_for_test(self) -> str:
+        """Get appropriate ROCm events for testing based on architecture."""
+        mi300_or_later = False
+        for arch in self.architectures:
+            if re.match(r"gfx9[4-9][0-9A-Fa-f]", arch):
+                mi300_or_later = True
+                break
+        if mi300_or_later:
+            return "GRBM_COUNT,SQ_WAVES,SQ_INSTS_VALU,TA_TA_BUSY:device=0"
+        return "SQ_WAVES"
+
+    @property
+    def counter_names(self) -> list[str]:
+        """Get counter names for validation based on architecture"""
+        mi300_or_later = False
+        for arch in self.architectures:
+            if re.match(r"gfx9[4-9][0-9A-Fa-f]", arch):
+                mi300_or_later = True
+                break
+        if mi300_or_later:
+            return ["GRBM_COUNT", "SQ_WAVES", "SQ_INSTS_VALU", "TA_TA_BUSY"]
+        return ["SQ_WAVES"]
+
+    @property
+    def expected_counter_files(self) -> list[str]:
+        """Get expected counter output files based on architecture."""
+        return [f"rocprof-device-0-{name}.txt" for name in self.counter_names]
+
+
+def get_rocminfo(rocm_path: Optional[Path] = None) -> Optional[Path]:
+    """Get the path to the rocminfo executable.
+
+    Args:
+        rocm_path: Path to the ROCm installation directory
+
+    Returns:
+        Path to the rocminfo executable or None if not found
+    """
+    if rocm_path:
+        candidate = rocm_path / "bin" / "rocminfo"
+        if candidate.exists():
+            return Path(candidate).resolve()
+    rocminfo = shutil.which("rocminfo")
+    if rocminfo:
+        return Path(rocminfo).resolve()
+    return None
+
+
+@lru_cache(maxsize=1)
+def detect_gpu(rocm_path: Optional[Path] = None) -> GPUInfo:
+    """Detect available AMD GPUs and their capabilities.
+
+    Uses rocminfo to get the list of GPU architectures.
+    Regex avoids matching "gfxX-X-generic" which may appear.
+    """
+    categories: set[str] = set()
+    architectures: list[str] = []
+    device_count = 0
+
+    # Detect available GPUs
+    rocminfo = None
+    if rocm_path:
+        rocminfo = rocm_path / "bin" / "rocminfo"
+    if not rocminfo:
+        rocminfo = shutil.which("rocminfo")
+
+    if rocminfo:
+        try:
+            result = subprocess.run(
+                [str(rocminfo)],
+                capture_output=True,
+                text=True,
+                timeout=30,
+            )
+            if result.returncode == 0:
+                # Only match gfx on "Name:"
+                name_gfx_pattern = re.compile(
+                    r"^\s*Name:\s+(gfx[0-9A-Fa-f][0-9A-Fa-f]+)", re.MULTILINE
+                )
+                all_matches = name_gfx_pattern.findall(result.stdout)
+                # gfx000 is the cpu, remove it
+                filtered = [arch for arch in all_matches if arch != "gfx000"]
+                device_count = len(filtered)
+                # Remove duplicates
+                architectures = list(set(filtered))
+        except (subprocess.TimeoutExpired, OSError):
+            pass
+
+    for arch in architectures:
+        categories.update(lookup_gpu_category(arch, rocm_path))
+
+    return GPUInfo(
+        available=device_count > 0,
+        architectures=sorted(architectures),
+        device_count=device_count,
+        categories=categories,
+    )
+
+
+def lookup_gpu_category(arch: str, rocm_path: Optional[Path] = None) -> list[str]:
+    """Lookup the GPU category for an architecture.
+
+    Args:
+        arch: Architecture string (e.g., 'gfx940')
+
+    Returns:
+        List of GPU categories the architecture belongs to (instinct, radeon, apu)
+    """
+    instinct_list = [
+        "gfx900",
+        "gfx906",  # MI50/MI60
+        "gfx908",
+        "gfx90a",
+        "gfx942",
+        "gfx950",
+    ]
+
+    # Also includes PRO GPUs
+    # Ignore Radeon VII (gfx906)
+    radeon_list = [
+        "gfx1010",
+        "gfx1011",
+        "gfx1012",
+        "gfx1030",
+        "gfx1031",
+        "gfx1032",
+        "gfx1100",
+        "gfx1101",
+        "gfx1102",
+        "gfx1200",
+        "gfx1201",
+        "gfx1202",
+    ]
+
+    apu_list = [
+        "gfx1035",
+        "gfx1036",
+        "gfx1103",
+        "gfx1151",
+        "gfx1152",
+        "gfx1153",
+    ]
+
+    categories: list[str] = []
+
+    if arch in instinct_list:
+        categories.append("instinct")
+        # Some instinct GPUs may also be an APU (ex: MI300A)
+        rocminfo = get_rocminfo(rocm_path)
+        if rocminfo:
+            try:
+                result = subprocess.run(
+                    [str(rocminfo)],
+                    capture_output=True,
+                    text=True,
+                    timeout=30,
+                )
+                if result.returncode == 0 and "APU" in result.stdout:
+                    categories.append("apu")
+            except (subprocess.TimeoutExpired, OSError):
+                pass
+    if arch in radeon_list:
+        categories.append("radeon")
+    if arch in apu_list:
+        categories.append("apu")
+
+    if not categories:
+        # Unknown architecture, default to instinct
+        categories.append("instinct")
+
+    return categories
+
+
+@lru_cache(maxsize=1)
+def get_offload_extractor(rocm_path: Path) -> tuple[Optional[Path], Optional[bool]]:
+    """Get offload extractor path
+
+    An offload extractor is one of:
+        llvm-objdump (only if version >= 20) - Preferred
+        roc-obj-ls (deprecated)              - Fallback
+
+    Args:
+        rocm_path: Path to the ROCm installation directory
+
+    Returns:
+        Path to the offload extractor
+        Bool representing whether found llvm-objdump's version < 20 (None if llvm-objdump not found)
+    """
+
+    is_llvm_too_old = None
+    offload_extractor = None
+    # Check env var - accepts either path to binary or directory containing it
+    llvm_objdump_env = os.environ.get("ROCM_LLVM_OBJDUMP")
+    if llvm_objdump_env:
+        llvm_objdump_path = Path(llvm_objdump_env)
+        if llvm_objdump_path.is_file() and llvm_objdump_path.exists():
+            offload_extractor = llvm_objdump_path
+        elif llvm_objdump_path.is_dir():
+            candidate = llvm_objdump_path / "llvm-objdump"
+            if candidate.exists():
+                offload_extractor = candidate
+
+    # Fallback to ROCm path
+    if not offload_extractor and rocm_path:
+        llvm_objdump_candidates = [
+            rocm_path / "llvm" / "bin" / "llvm-objdump",
+            rocm_path / "bin" / "llvm-objdump",
+        ]
+        for candidate in llvm_objdump_candidates:
+            if candidate.exists():
+                offload_extractor = candidate
+                break
+
+    if offload_extractor:
+        # We have found llvm-objdump, check version
+        try:
+            version_result = subprocess.run(
+                [str(offload_extractor), "--version"],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            version_match = re.search(r"version\s+(\d+)", version_result.stdout)
+            if version_match:
+                major_version = int(version_match.group(1))
+                if major_version >= 20:
+                    is_llvm_too_old = False
+                    return (
+                        Path(offload_extractor).resolve(),
+                        is_llvm_too_old,
+                    )
+                else:
+                    is_llvm_too_old = True
+        except Exception:
+            pass
+
+    # Fallback to roc-obj-ls
+    offload_extractor = None
+    if rocm_path:
+        candidate = rocm_path / "bin" / "roc-obj-ls"
+        if candidate.exists():
+            offload_extractor = Path(candidate).resolve()
+            return offload_extractor, is_llvm_too_old
+    if not offload_extractor:
+        offload_extractor = shutil.which("roc-obj-ls")
+    if offload_extractor:
+        return offload_extractor, is_llvm_too_old
+    return None, is_llvm_too_old
+
+
+def get_target_gpu_arch(rocm_path: Path, target_path: Path) -> list[str]:
+    """Get the list of gpu architectures (gfx) the target was compiled for.
+
+    Args:
+        rocm_path: Path to the ROCm installation directory
+        target_path: Path to the binary to check
+
+    Returns:
+        List of GPU architectures the target was compiled for
+
+    Raises:
+        FileNotFoundError: If offload extractor is not found
+    """
+    import tempfile
+
+    target_archs: set[str] = set()
+
+    result = get_offload_extractor(rocm_path)
+    if not result:
+        raise FileNotFoundError(
+            f"Could not find offload extractor in {rocm_path} "
+            "or environment variable ROCM_LLVM_OBJDUMP"
+        )
+    tool_path, _ = result
+
+    if "llvm-objdump" in tool_path.name:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            tmp_symlink = Path(tmpdir) / target_path.name
+            try:
+                tmp_symlink.symlink_to(target_path)
+            except OSError:
+                return list(target_archs)
+
+            extracted_files: list[Path] = []
+            try:
+                result = subprocess.run(
+                    [str(tool_path), "--offloading", str(tmp_symlink)],
+                    capture_output=True,
+                    text=True,
+                    timeout=30,
+                )
+                if result.returncode == 0:
+                    for line in result.stdout.strip().split("\n"):
+                        # Match any gfxXXXX pattern in the line
+                        match = re.search(r"(gfx[0-9a-fA-F]+)", line)
+                        if match:
+                            target_archs.add(match.group(1))
+
+                        # Capture extracted bundle paths for cleanup
+                        bundle_match = re.search(
+                            r"Extracting offload bundle:\s*(.+)$", line
+                        )
+                        if bundle_match:
+                            extracted_files.append(Path(bundle_match.group(1)))
+            except (subprocess.TimeoutExpired, OSError):
+                pass
+
+            # Immediately clean up extracted files to free disk space
+            for extracted_file in extracted_files:
+                try:
+                    if extracted_file.exists():
+                        extracted_file.unlink()
+                except OSError:
+                    pass
+
+    elif "roc-obj-ls" in tool_path.name:
+        try:
+            result = subprocess.run(
+                [str(tool_path), str(target_path)],
+                capture_output=True,
+                text=True,
+                timeout=30,
+            )
+            if result.returncode == 0:
+                for line in result.stdout.strip().split("\n"):
+                    # Match any gfxXXXX pattern in the line
+                    match = re.search(r"(gfx[0-9a-fA-F]+)", line)
+                    if match:
+                        target_archs.add(match.group(1))
+        except (subprocess.TimeoutExpired, OSError):
+            pass
+
+    return list(target_archs)
@@ -0,0 +1,585 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+Test runners for different rocprofiler-systems instrumentation modes.
+
+Provides classes for running tests with:
+- Baseline execution (no instrumentation)
+- Sampling instrumentation
+- Binary rewrite instrumentation
+- Runtime instrumentation
+- rocprof-sys-run wrapper
+"""
+
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+import os
+from pathlib import Path
+import shutil
+import subprocess
+from typing import Optional
+from .config import RocprofsysConfig
+
+
+def _safe_remove_file(filepath: Path) -> None:
+    """Safely remove a file, ignoring errors."""
+    try:
+        if filepath.is_file():
+            filepath.unlink()
+    except OSError:
+        pass
+
+
+def _safe_remove_directory(dirpath: Path) -> None:
+    """Safely remove a directory recursively, ignoring errors."""
+    try:
+        if dirpath.is_dir():
+            shutil.rmtree(dirpath)
+    except OSError:
+        pass
+
+
+def _decode_bytes(data: bytes | None, encoding: str = "utf-8") -> str:
+    """Decode bytes to string, returning empty string if None."""
+    if data is None:
+        return ""
+    return data.decode(encoding, errors="replace")
+
+
+@dataclass
+class TestResult:
+    """Result of a test execution
+
+    Attributes:
+        returncode: Process exit code
+        test_output: Standard output and error content
+        extra_output: Extra output set by the test itself
+                      (as of now, only used for timeout errors)
+        output_dir: Directory containing output files
+        command: The command that was executed
+        env: Environment variables used
+        duration: Execution time in seconds (if measured)
+        _instrumented_files: List of instrumented binary files created
+    """
+
+    returncode: int
+    test_output: str
+    output_dir: Path
+    command: list[str]
+    environment: dict[str, str]
+    extra_output: Optional[str] = None
+    duration: Optional[float] = None
+    _instrumented_files: list[Path] = field(default_factory=list)
+
+    @property
+    def success(self) -> bool:
+        """Check if test execution succeeded.
+
+        Returns True only if:
+        - Return code is 0
+        """
+        return self.returncode == 0
+
+    @property
+    def perfetto_file(self) -> Optional[Path]:
+        candidates = [
+            self.output_dir / "perfetto-trace.proto",
+            self.output_dir / "perfetto-trace-0.proto",
+        ]
+        for candidate in candidates:
+            if candidate.exists():
+                return candidate
+        protos = list(self.output_dir.glob("perfetto-trace*.proto"))
+        return protos[0] if protos else None
+
+    @property
+    def rocpd_file(self) -> Optional[Path]:
+        candidate = self.output_dir / "rocpd.db"
+        if candidate.exists():
+            return candidate
+        # Try globbing
+        dbs = list(self.output_dir.glob("*.db"))
+        return dbs[0] if dbs else None
+
+    @property
+    def timemory_files(self) -> list[Path]:
+        """List of timemory output files."""
+        return list(self.output_dir.glob("*.json")) + list(self.output_dir.glob("*.txt"))
+
+    def get_output_file(self, pattern: str) -> Optional[Path]:
+        """Get an output file matching the given pattern.
+
+        Args:
+            pattern: Glob pattern to match
+
+        Returns:
+            First matching file or None
+        """
+        matches = list(self.output_dir.glob(pattern))
+        return matches[0] if matches else None
+
+    def cleanup(self, keep_on_failure: bool = True) -> None:
+        """Clean up test output files.
+
+        Args:
+            keep_on_failure: If True, keep files when test failed for debugging
+        """
+        if os.environ.get("ROCPROFSYS_KEEP_TEST_OUTPUT", "1") == "1":
+            return
+
+        if keep_on_failure and not self.success:
+            return
+
+        # Clean up instrumented binaries
+        for inst_file in self._instrumented_files:
+            _safe_remove_file(inst_file)
+
+        # Clean up output directory
+        if self.output_dir.exists():
+            _safe_remove_directory(self.output_dir)
+
+    def cleanup_instrumented_binaries(self) -> None:
+        """Clean up only the instrumented binary files."""
+        if os.environ.get("ROCPROFSYS_KEEP_TEST_OUTPUT", "1") == "1":
+            return
+
+        for inst_file in self._instrumented_files:
+            _safe_remove_file(inst_file)
+
+        # Also clean any .inst files in output directory
+        if self.output_dir.exists():
+            for inst_file in self.output_dir.glob("*.inst"):
+                _safe_remove_file(inst_file)
+
+
+class BaseRunner(ABC):
+    """Abstract base class for test runners."""
+
+    def __init__(
+        self,
+        config: RocprofsysConfig,
+        target: str,
+        output_dir: Path,
+        run_args: Optional[list[str]] = None,
+        env: Optional[dict[str, str]] = None,
+        timeout: int = 300,
+        mpi_ranks: int = 0,
+        working_directory: Optional[Path] = None,
+    ):
+
+        self.config = config
+        self.target = target
+        self.target_exe = config.get_target_executable(target)
+        self.output_dir = Path(output_dir)
+        self.run_args = run_args or []
+        self.timeout = timeout
+        self.mpi_ranks = mpi_ranks
+        self.working_directory = working_directory or config.rocprofsys_build_dir
+        self.env = config.get_fundamental_environment()
+        self.env.update(config.get_base_environment())
+        self.env["ROCPROFSYS_OUTPUT_PATH"] = str(self.output_dir)
+        if env:
+            self.env.update(env)
+
+    @abstractmethod
+    def build_command(self) -> list[str]:
+        """Build the command to execute.
+
+        Returns:
+            List of command components
+        """
+        pass
+
+    def _wrap_with_mpi(self, command: list[str]) -> list[str]:
+        """Wrap command with MPI launcher if needed.
+
+        Args:
+            command: Base command
+
+        Returns:
+            Command wrapped with mpiexec if MPI is enabled
+        """
+        if self.mpi_ranks > 0 and self.config.mpiexec:
+            mpi_cmd = [
+                str(self.config.mpiexec),
+                "-n",
+                str(self.mpi_ranks),
+            ]
+
+            try:
+                result = subprocess.run(
+                    [str(self.config.mpiexec), "--oversubscribe", "-n", "1", "true"],
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.STDOUT,
+                    timeout=5,
+                )
+                if result.returncode == 0:
+                    mpi_cmd.insert(1, "--oversubscribe")
+            except (subprocess.TimeoutExpired, OSError):
+                pass
+
+            return mpi_cmd + command
+
+        return command
+
+    def run(self) -> TestResult:
+        """Execute the test.
+
+        Returns:
+            TestResult with execution results
+        """
+        import time
+
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+        command = self.build_command()
+        command = self._wrap_with_mpi(command)
+
+        start_time = time.time()
+
+        try:
+            result = subprocess.run(
+                command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                timeout=self.timeout,
+                env=self.env,
+                cwd=self.working_directory,
+            )
+
+            duration = time.time() - start_time
+            test_result = TestResult(
+                returncode=result.returncode,
+                test_output=result.stdout,
+                output_dir=self.output_dir,
+                command=command,
+                environment=self.env,
+                duration=duration,
+            )
+
+        except subprocess.TimeoutExpired as e:
+            duration = time.time() - start_time
+            stdout = _decode_bytes(e.stdout)
+            stderr = _decode_bytes(e.stderr)
+
+            test_result = TestResult(
+                returncode=-1,
+                test_output=stdout,
+                extra_output=f"Timeout after {self.timeout}s\n{stderr}",
+                output_dir=self.output_dir,
+                command=command,
+                environment=self.env,
+                duration=duration,
+            )
+
+        return test_result
+
+
+class BaselineRunner(BaseRunner):
+    """Run target without any instrumentation.
+
+    Can also be used to run arbitrary commands by providing the `command` parameter.
+     - command + run_args are executed as a single command
+    If a rocprof-sys binary is provided, uses "base_binary_environment" instead of "base_environment".
+
+    Args:
+        config: rocprofiler-systems configuration
+        target: Name of target executable (used if command is None)
+        output_dir: Directory for output files
+        command: Optional full command to run instead of target executable
+        **kwargs: Additional arguments passed to BaseRunner
+    """
+
+    # rocprof-sys binaries that should use get_base_binary_environment()
+    ROCPROFSYS_BINARIES = {
+        "rocprof-sys-instrument",
+        "rocprof-sys-sample",
+        "rocprof-sys-run",
+        "rocprof-sys-avail",
+    }
+
+    def __init__(
+        self,
+        config: RocprofsysConfig,
+        target: str,
+        output_dir: Path,
+        command: Optional[list[str]] = None,
+        **kwargs,
+    ):
+        super().__init__(config, target, output_dir, **kwargs)
+        self.command = command
+
+        # If target is a rocprof-sys binary, use binary environment instead
+        if target in self.ROCPROFSYS_BINARIES:
+            self.env = config.get_fundamental_environment()
+            self.env.update(config.get_base_binary_environment())
+            self.env["ROCPROFSYS_OUTPUT_PATH"] = str(self.output_dir)
+            # Re-apply any custom env passed via kwargs
+            if "env" in kwargs and kwargs["env"]:
+                self.env.update(kwargs["env"])
+
+    def build_command(self) -> list[str]:
+        if self.command:
+            return self.command + self.run_args
+        return [str(self.target_exe)] + self.run_args
+
+
+class SamplingRunner(BaseRunner):
+    """Run target with sampling instrumentation."""
+
+    def __init__(
+        self,
+        config: RocprofsysConfig,
+        target: str,
+        output_dir: Path,
+        sample_args: Optional[list[str]] = None,
+        **kwargs,
+    ):
+        """Initialize sampling runner.
+
+        Args:
+            config: rocprofiler-systems configuration
+            target: Name of target executable
+            output_dir: Directory for output files
+            sample_args: Arguments for rocprof-sys-sample
+            **kwargs: Additional arguments passed to BaseRunner
+        """
+        super().__init__(config, target, output_dir, **kwargs)
+        self.sample_args = sample_args or []
+
+    def build_command(self) -> list[str]:
+        return (
+            [str(self.config.rocprofsys_sample)]
+            + self.sample_args
+            + ["--", str(self.target_exe)]
+            + self.run_args
+        )
+
+
+class BinaryRewriteRunner(BaseRunner):
+    """Run binary rewrite instrumentation (two-phase: rewrite then run)."""
+
+    def __init__(
+        self,
+        config: RocprofsysConfig,
+        target: str,
+        output_dir: Path,
+        rewrite_args: Optional[list[str]] = None,
+        cleanup_on_success: bool = False,
+        **kwargs,
+    ):
+        """Initialize binary rewrite runner.
+
+        Args:
+            config: rocprofiler-systems configuration
+            target: Name of target executable
+            output_dir: Directory for output files
+            rewrite_args: Arguments for rocprof-sys-instrument
+            cleanup_on_success: Whether to clean up instrumented binary immediately
+                after successful run. Default is False - let the test_output_dir
+                fixture handle cleanup after validation completes.
+            **kwargs: Additional arguments passed to BaseRunner
+        """
+        super().__init__(config, target, output_dir, **kwargs)
+        self.rewrite_args = rewrite_args or []
+        self.instrumented_exe = output_dir / f"{target}.inst"
+        self.cleanup_on_success = cleanup_on_success
+        self._instrumented_files: list[Path] = []
+
+    def rewrite(self) -> TestResult:
+        """Perform binary rewrite phase.
+
+        Returns:
+            TestResult from rewrite operation
+        """
+        import time
+
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+        command = (
+            [str(self.config.rocprofsys_instrument)]
+            + ["-o", str(self.instrumented_exe)]
+            + self.rewrite_args
+            + ["--print-instrumented", "functions"]
+            + ["--", str(self.target_exe)]
+        )
+
+        start_time = time.time()
+
+        try:
+            result = subprocess.run(
+                command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                timeout=self.timeout,
+                env=self.env,
+                cwd=self.config.rocprofsys_build_dir,
+            )
+
+            duration = time.time() - start_time
+            test_result = TestResult(
+                returncode=result.returncode,
+                test_output=result.stdout,
+                output_dir=self.output_dir,
+                command=command,
+                environment=self.env,
+                duration=duration,
+                _instrumented_files=self._instrumented_files.copy(),
+            )
+
+        except subprocess.TimeoutExpired as e:
+            duration = time.time() - start_time
+            stdout = _decode_bytes(e.stdout)
+            stderr = _decode_bytes(e.stderr)
+
+            test_result = TestResult(
+                returncode=-1,
+                test_output=stdout,
+                extra_output=f"Timeout after {self.timeout}s\n{stderr}",
+                output_dir=self.output_dir,
+                command=command,
+                environment=self.env,
+                duration=duration,
+                _instrumented_files=self._instrumented_files.copy(),
+            )
+
+        # Track instrumented files for cleanup
+        if self.instrumented_exe.exists():
+            self._instrumented_files.append(self.instrumented_exe)
+
+        return test_result
+
+    def build_command(self) -> list[str]:
+        """Build command to run the instrumented binary."""
+        return [
+            str(self.config.rocprofsys_run),
+            "--",
+            str(self.instrumented_exe),
+        ] + self.run_args
+
+    def run(self) -> TestResult:
+        """Execute full rewrite + run sequence.
+
+        Returns:
+            TestResult from full rewrite + run sequence
+
+        Note:
+            By default, cleanup is handled by the test_output_dir fixture
+            AFTER the test completes (including validation). Set cleanup_on_success=True
+            only if you want immediate cleanup of .inst files (validation files are
+            preserved regardless).
+        """
+        # First, perform rewrite
+        rewrite_result = self.rewrite()
+        if not rewrite_result.success:
+            return rewrite_result
+
+        # Then run the instrumented binary
+        run_result = super().run()
+
+        # Add instrumented files to result for cleanup (used by fixtures)
+        run_result._instrumented_files = self._instrumented_files.copy()
+
+        # Optional immediate cleanup of .inst files only (NOT validation files)
+        # Default is False - let test_output_dir fixture handle all cleanup
+        # after validation completes
+        if self.cleanup_on_success and run_result.success:
+            run_result.cleanup_instrumented_binaries()
+
+        # Combine rewrite and run output
+        run_result.test_output = (
+            f"=== REWRITE PHASE ===\n{rewrite_result.test_output}\n"
+            f"=== RUN PHASE ===\n{run_result.test_output}"
+        )
+        run_result.duration = rewrite_result.duration + run_result.duration
+        extra_parts = []
+        if rewrite_result.extra_output:
+            extra_parts.append(f"=== REWRITE PHASE ===\n{rewrite_result.extra_output}")
+        if run_result.extra_output:
+            extra_parts.append(f"=== RUN PHASE ===\n{run_result.extra_output}")
+        if extra_parts:
+            run_result.extra_output = "\n".join(extra_parts)
+
+        return run_result
+
+    def cleanup(self) -> None:
+        """Clean up instrumented binary files."""
+        if os.environ.get("ROCPROFSYS_KEEP_TEST_OUTPUT", "1") == "1":
+            return
+
+        for inst_file in self._instrumented_files:
+            _safe_remove_file(inst_file)
+
+        # Also clean any .inst files in output directory
+        if self.output_dir.exists():
+            for inst_file in self.output_dir.glob("*.inst"):
+                _safe_remove_file(inst_file)
+
+
+class RuntimeInstrumentRunner(BaseRunner):
+    """Run target with runtime instrumentation."""
+
+    def __init__(
+        self,
+        config: RocprofsysConfig,
+        target: str,
+        output_dir: Path,
+        instrument_args: Optional[list[str]] = None,
+        **kwargs,
+    ):
+        """Initialize runtime instrument runner.
+
+        Args:
+            config: rocprofiler-systems configuration
+            target: Name of target executable
+            output_dir: Directory for output files
+            instrument_args: Arguments for rocprof-sys-instrument
+            **kwargs: Additional arguments passed to BaseRunner
+        """
+        super().__init__(config, target, output_dir, **kwargs)
+        self.instrument_args = instrument_args or []
+
+    def build_command(self) -> list[str]:
+        return (
+            [str(self.config.rocprofsys_instrument)]
+            + self.instrument_args
+            + ["--print-instrumented", "functions"]
+            + ["--", str(self.target_exe)]
+            + self.run_args
+        )
+
+
+class SysRunRunner(BaseRunner):
+    """Run target with rocprof-sys-run wrapper."""
+
+    def __init__(
+        self,
+        config: RocprofsysConfig,
+        target: str,
+        output_dir: Path,
+        sysrun_args: Optional[list[str]] = None,
+        **kwargs,
+    ):
+        """Initialize sys-run runner.
+
+        Args:
+            config: rocprofiler-systems configuration
+            target: Name of target executable
+            output_dir: Directory for output files
+            sysrun_args: Arguments for rocprof-sys-run (before --)
+            **kwargs: Additional arguments passed to BaseRunner
+        """
+        super().__init__(config, target, output_dir, **kwargs)
+        self.sysrun_args = sysrun_args or []
+
+    def build_command(self) -> list[str]:
+        return (
+            [str(self.config.rocprofsys_run)]
+            + self.sysrun_args
+            + ["--", str(self.target_exe)]
+            + self.run_args
+        )
@@ -0,0 +1,417 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+Output validators for rocprofiler-systems test results.
+
+This module wraps the existing validation scripts from the tests/ directory:
+- validate-perfetto-proto.py
+- validate-rocpd.py
+- validate-timemory-json.py
+- validate-causal-json.py
+
+We also provide the following validators:
+- validate_file_exists
+"""
+
+from __future__ import annotations
+import os
+import re
+import shlex
+import subprocess
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Optional
+
+
+@dataclass
+class ValidationResult:
+    """Result of a validation operation.
+
+    Attributes:
+        is_valid: Whether the validation passed
+        message: Description of result or error
+        details: Additional details (e.g., query results)
+        stdout: Standard output from validation script
+        stderr: Standard error from validation script
+        command: The command that was executed
+    """
+
+    is_valid: bool
+    message: str
+    details: Optional[dict[str, Any]] = None
+    stdout: str = ""
+    stderr: str = ""
+    command: str = ""
+
+
+ROCPROFSYS_ABORT_FAIL_REGEX = [
+    r"### ERROR ###",
+    r"unknown-hash=",
+    r"address of faulting memory reference",
+    r"exiting with non-zero exit code",
+    r"terminate called after throwing an instance",
+    r"calling abort\.\. in ",
+    r"Exit code: [1-9]",
+]
+
+from rocprofsys.runners import TestResult
+
+
+def validate_regex(
+    test_result: TestResult,
+    pass_regex: Optional[list[str]] = None,
+    fail_regex: Optional[list[str]] = None,
+    use_abort_fail_regex: bool = True,
+) -> ValidationResult:
+    """Validate the regex patterns in the test result.
+    Does not check for result return code.
+
+    Args:
+        test_result: TestResult object (after test execution)
+        pass_regex: Optional list of regex patterns that must be found for success
+        fail_regex: Optional list of regex patterns that must NOT be found
+        use_abort_fail_regex: Whether to validate against ROCPROFSYS_ABORT_FAIL_REGEX (default: True)
+
+    Returns:
+        ValidationResult with is_valid=True if all patterns pass, False otherwise
+    """
+    # Do not check for result return code
+
+    # Build fail regex list
+    fail_patterns: list[str] = []
+    if fail_regex:
+        fail_patterns.extend(fail_regex)
+    if use_abort_fail_regex:
+        fail_patterns.extend(ROCPROFSYS_ABORT_FAIL_REGEX)
+
+    # Build combined regex with named groups
+    all_patterns: list[str] = []
+    fail_indices: set[str] = set()
+    pass_indices: set[str] = set()
+
+    if fail_patterns:
+        for i, pattern in enumerate(fail_patterns):
+            all_patterns.append(f"(?P<f{i}>{pattern})")
+            fail_indices.add(f"f{i}")
+
+    if pass_regex:
+        for i, pattern in enumerate(pass_regex):
+            all_patterns.append(f"(?P<p{i}>{pattern})")
+            pass_indices.add(f"p{i}")
+
+    if not all_patterns:
+        return ValidationResult(is_valid=True, message="No patterns to validate")
+
+    # Single scan with combined regex
+    combined_regex = re.compile("|".join(all_patterns))
+    found_pass: set[str] = set()
+
+    for match in combined_regex.finditer(test_result.test_output):
+        matched_group = match.lastgroup
+
+        if matched_group in fail_indices:
+            original_idx = int(matched_group[1:])
+            return ValidationResult(
+                is_valid=False,
+                message=f"Fail pattern matched: {fail_patterns[original_idx]}",
+            )
+
+        if matched_group in pass_indices:
+            found_pass.add(matched_group)
+
+    # Check if all pass patterns were found
+    if pass_regex:
+        missing = pass_indices - found_pass
+        if missing:
+            missing_idx = int(next(iter(missing))[1:])
+            return ValidationResult(
+                is_valid=False,
+                message=f"Pass pattern not found: {pass_regex[missing_idx]}",
+            )
+
+    return ValidationResult(is_valid=True, message="All patterns validated successfully")
+
+
+def validate_file_exists(path: Path, description: str = "File") -> ValidationResult:
+    """Validate that a file exists and is non-empty.
+
+    Args:
+        path: Path to check
+        description: Description for error messages
+
+    Returns:
+        ValidationResult
+    """
+
+    if not path.exists():
+        return ValidationResult(False, f"{description} not found: {path}")
+
+    if path.stat().st_size == 0:
+        return ValidationResult(False, f"{description} is empty: {path}")
+
+    return ValidationResult(True, f"{description} exists: {path}")
+
+
+def _run_validation_script(
+    script_name: str,
+    args: list[str],
+    tests_dir: Path,
+    timeout: int = 60,
+) -> ValidationResult:
+    """Run an existing validation script from the tests directory.
+
+    Args:
+        script_name: Name of the script (e.g., 'validate-perfetto-proto.py')
+        args: Arguments to pass to the script
+        tests_dir: Path to directory containing validation scripts
+        timeout: Timeout in seconds
+
+    Returns:
+        ValidationResult with script output
+    """
+    script_path = tests_dir / script_name
+
+    if not script_path.exists():
+        return ValidationResult(False, f"Validation script not found: {script_path}")
+
+    cmd = [sys.executable, str(script_path)] + args
+    cmd_str = " ".join(shlex.quote(arg) for arg in cmd)
+
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+        )
+
+        if result.returncode == 0:
+            message = result.stdout.strip()
+        else:
+            message = (
+                result.stderr.strip()
+                or result.stdout.strip()
+                or f"Exit code: {result.returncode}"
+            )
+
+        return ValidationResult(
+            is_valid=(result.returncode == 0),
+            message=message,
+            stdout=result.stdout,
+            stderr=result.stderr,
+            command=cmd_str,
+        )
+
+    except subprocess.TimeoutExpired:
+        return ValidationResult(
+            False, f"Validation timed out after {timeout}s", command=cmd_str
+        )
+    except Exception as e:
+        return ValidationResult(False, f"Validation error: {e}", command=cmd_str)
+
+
+# ============================================================================
+# Perfetto Validation - wraps validate-perfetto-proto.py
+# ============================================================================
+
+
+def validate_perfetto_trace(
+    trace_path: Path,
+    tests_dir: Path,
+    categories: Optional[list[str]] = None,
+    labels: Optional[list[str]] = None,
+    counts: Optional[list[int]] = None,
+    depths: Optional[list[int]] = None,
+    label_substrings: Optional[list[str]] = None,
+    counter_names: Optional[list[str]] = None,
+    key_names: Optional[list[str]] = None,
+    key_counts: Optional[list[int]] = None,
+    trace_processor_path: Optional[Path] = None,
+    print_output: bool = False,
+    timeout: int = 120,
+) -> ValidationResult:
+    """Validate a Perfetto trace file using validate-perfetto-proto.py.
+
+    Args:
+        trace_path: Path to perfetto-trace.proto file
+        tests_dir: Path to directory containing validation scripts
+        categories: List of categories to filter by (-m flag)
+        labels: Expected labels (-l flag)
+        counts: Expected counts (-c flag)
+        depths: Expected depths (-d flag)
+        label_substrings: Expected label substrings (-s flag)
+        counter_names: Counter names to validate (--counter-names flag)
+        key_names: Debug key names to check (--key-names flag)
+        key_counts: Expected counts for debug keys (--key-counts flag)
+        trace_processor_path: Path to trace_processor_shell (-t flag)
+        print_output: Whether to print trace data (-p flag)
+        timeout: Validation timeout in seconds
+
+    Returns:
+        ValidationResult with validation status
+    """
+    if not trace_path.exists():
+        return ValidationResult(False, f"Trace file not found: {trace_path}")
+
+    # Allow override of trace_processor_path to allow perfetto validation using older GLIBC versions
+    env_path = os.environ.get("ROCPROFSYS_TRACE_PROC_SHELL")
+    if env_path:
+        trace_processor_path = Path(env_path)
+
+    args = ["-i", str(trace_path)]
+
+    if categories:
+        args.extend(["-m"] + categories)
+
+    if labels:
+        args.extend(["-l"] + labels)
+    elif label_substrings:
+        args.extend(["-s"] + label_substrings)
+
+    if counts:
+        args.extend(["-c"] + [str(c) for c in counts])
+
+    if depths:
+        args.extend(["-d"] + [str(d) for d in depths])
+
+    if counter_names:
+        args.extend(["--counter-names"] + counter_names)
+
+    if key_names:
+        args.extend(["--key-names"] + key_names)
+
+    if key_counts:
+        args.extend(["--key-counts"] + [str(k) for k in key_counts])
+
+    if trace_processor_path:
+        args.extend(["-t", str(trace_processor_path)])
+
+    if print_output:
+        args.append("-p")
+
+    return _run_validation_script("validate-perfetto-proto.py", args, tests_dir, timeout)
+
+
+# ============================================================================
+# ROCpd Database Validation - wraps validate-rocpd.py
+# ============================================================================
+
+
+def validate_rocpd_database(
+    db_path: Path,
+    tests_dir: Path,
+    rules_files: Optional[list[Path]] = None,
+    timeout: int = 60,
+) -> ValidationResult:
+    """Validate a ROCpd database file using validate-rocpd.py.
+
+    Args:
+        db_path: Path to rocpd.db file
+        tests_dir: Path to directory containing validation scripts
+        rules_files: List of JSON rules files to use for validation
+        timeout: Validation timeout in seconds
+
+    Returns:
+        ValidationResult with validation status
+    """
+    if not db_path.exists():
+        return ValidationResult(False, f"Database not found: {db_path}")
+
+    args = ["-db", str(db_path)]
+
+    if rules_files:
+        existing_rules = [str(r) for r in rules_files if r.exists()]
+        if existing_rules:
+            args.extend(["-r"] + existing_rules)
+
+    return _run_validation_script("validate-rocpd.py", args, tests_dir, timeout)
+
+
+# ============================================================================
+# Timemory JSON Validation - wraps validate-timemory-json.py
+# ============================================================================
+
+
+def validate_timemory_json(
+    json_path: Path,
+    tests_dir: Path,
+    metric: str,
+    labels: Optional[list[str]] = None,
+    counts: Optional[list[int]] = None,
+    depths: Optional[list[int]] = None,
+    print_output: bool = False,
+    timeout: int = 60,
+) -> ValidationResult:
+    """Validate a timemory JSON output file using validate-timemory-json.py.
+
+    Args:
+        json_path: Path to JSON file
+        metric: Metric name to validate (-m flag)
+        tests_dir: Path to directory containing validation scripts
+        labels: Expected labels (-l flag)
+        counts: Expected counts (-c flag)
+        depths: Expected depths (-d flag)
+        print_output: Whether to print data (-p flag)
+        timeout: Validation timeout in seconds
+
+    Returns:
+        ValidationResult with validation status
+    """
+    if not json_path.exists():
+        return ValidationResult(False, f"JSON file not found: {json_path}")
+
+    args = ["-i", str(json_path), "-m", metric]
+
+    if labels:
+        args.extend(["-l"] + labels)
+
+    if counts:
+        args.extend(["-c"] + [str(c) for c in counts])
+
+    if depths:
+        args.extend(["-d"] + [str(d) for d in depths])
+
+    if print_output:
+        args.append("-p")
+
+    return _run_validation_script("validate-timemory-json.py", args, tests_dir, timeout)
+
+
+# ============================================================================
+# Causal JSON Validation - wraps validate-causal-json.py
+# ============================================================================
+
+
+def validate_causal_json(
+    json_path: Path,
+    tests_dir: Path,
+    ci_mode: bool = False,
+    additional_args: Optional[list[str]] = None,
+    timeout: int = 60,
+) -> ValidationResult:
+    """Validate a causal profiling JSON output file using validate-causal-json.py.
+
+    Args:
+        json_path: Path to causal JSON file
+        tests_dir: Path to directory containing validation scripts
+        ci_mode: Whether running in CI mode (--ci flag)
+        additional_args: Additional arguments to pass to the script
+        timeout: Validation timeout in seconds
+
+    Returns:
+        ValidationResult with validation status
+    """
+    if not json_path.exists():
+        return ValidationResult(False, f"JSON file not found: {json_path}")
+
+    args = [str(json_path)]
+
+    if ci_mode:
+        args.append("--ci")
+
+    if additional_args:
+        args.extend(additional_args)
+
+    return _run_validation_script("validate-causal-json.py", args, tests_dir, timeout)
@@ -0,0 +1,757 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+Tests rocprof-sys binaries
+"""
+
+from __future__ import annotations
+import pytest
+from pathlib import Path
+import os
+
+pytestmark = [pytest.mark.rocprof_binary]
+
+
+# ============================================================================
+# Helper functions
+# ============================================================================
+
+
+def get_ls_command() -> tuple[str, list[str]]:
+    """Get ls binary name and args (handles RedHat coreutils wrapper).
+
+    Returns:
+        Tuple of (binary_name, args_list)
+    """
+    if os.path.exists("/usr/bin/coreutils"):
+        return "coreutils", ["--coreutils-prog=ls"]
+    return "ls", []
+
+
+# ============================================================================
+# rocprof-sys-instrument tests
+# ============================================================================
+
+
+class TestInstrumentBinary:
+    """Tests for rocprof-sys-instrument binary."""
+
+    target = "rocprof-sys-instrument"
+
+    def test_help(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        pass_regex = [
+            r"\[rocprof-sys-instrument\] Usage:[\s\S]*"
+            r"\[DEBUG OPTIONS\][\s\S]*"
+            r"\[MODE OPTIONS\][\s\S]*"
+            r"\[LIBRARY OPTIONS\][\s\S]*"
+            r"\[SYMBOL SELECTION OPTIONS\][\s\S]*"
+            r"\[RUNTIME OPTIONS\][\s\S]*"
+            r"\[GRANULARITY OPTIONS\][\s\S]*"
+            r"\[DYNINST OPTIONS\]"
+        ]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["--help"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex)
+
+    def test_simulate_ls(
+        self,
+        run_test,
+        assert_regex,
+        assert_file_exists,
+    ):
+        ls_name, ls_args = get_ls_command()
+
+        test_args = [
+            "--simulate",
+            "--print-format",
+            "json",
+            "txt",
+            "xml",
+            "-v",
+            "2",
+            "--all-functions",
+            "--",
+            ls_name,
+            *ls_args,
+        ]
+
+        expected_files = [
+            "available.json",
+            "available.txt",
+            "available.xml",
+            "excluded.json",
+            "excluded.txt",
+            "excluded.xml",
+            "instrumented.json",
+            "instrumented.txt",
+            "instrumented.xml",
+            "overlapping.json",
+            "overlapping.txt",
+            "overlapping.xml",
+        ]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=test_args,
+            timeout=240,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result)
+        expected_files_paths = [
+            result.output_dir / "instrumentation" / f for f in expected_files
+        ]
+        assert_file_exists(expected_files_paths)
+
+    def test_simulate_lib(
+        self,
+        rocprof_config,
+        run_test,
+        assert_regex,
+    ):
+        user_lib = rocprof_config.rocprofsys_lib_dir / "librocprof-sys-user.so"
+        if not user_lib.exists():
+            pytest.fail("librocprof-sys-user.so not found")
+
+        pass_regex = [
+            r"\[rocprof-sys\]\[exe\] Runtime instrumentation is not possible![\s\S]*"
+            r"\[rocprof-sys\]\[exe\] Switching to binary rewrite mode and assuming '--simulate --all-functions'"
+        ]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["--print-available", "functions", "-v", "2", "--", str(user_lib)],
+            timeout=120,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex)
+
+    def test_simulate_lib_basename(
+        self,
+        rocprof_config,
+        test_output_dir,
+        run_test,
+        assert_regex,
+    ):
+        """Test instrument with library basename.
+
+        This MUST be run from a tmp directory, NOT from the actual lib directory.
+        Running from the lib directory causes Dyninst to modify the library in-place,
+        contaminating it with instrumentation markers. This breaks all subsequent
+        binary rewrite tests with "unable to reinstrument previously instrumented
+        binary" errors.
+        """
+        lib_basename = "librocprof-sys-user.so"
+        user_lib = rocprof_config.rocprofsys_lib_dir / lib_basename
+        if not user_lib.exists():
+            pytest.skip(f"{lib_basename} not built")
+
+        tmp_dir = test_output_dir / "tmp"
+        tmp_dir.mkdir(parents=True, exist_ok=True)
+
+        output_lib = test_output_dir / lib_basename
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=[
+                "--print-available",
+                "functions",
+                "-v",
+                "2",
+                "-o",
+                str(output_lib),
+                "--",
+                lib_basename,
+            ],
+            timeout=120,
+            working_directory=tmp_dir,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result)
+
+    def test_write_log(
+        self,
+        run_test,
+        assert_regex,
+        assert_file_exists,
+    ):
+        """Test instrument writing to log file."""
+        ls_name, ls_args = get_ls_command()
+
+        pass_regex = [r"Opening .*/instrumentation/user\.log"]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=[
+                "--print-instrumented",
+                "functions",
+                "-v",
+                "1",
+                "--log-file",
+                "user.log",
+                "--",
+                ls_name,
+                *ls_args,
+            ],
+            timeout=120,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex)
+        assert_file_exists(result.output_dir / "instrumentation" / "user.log")
+
+
+# ============================================================================
+# rocprof-sys-avail tests
+# ============================================================================
+
+
+class TestAvailBinary:
+    """Tests for rocprof-sys-avail binary."""
+
+    target = "rocprof-sys-avail"
+
+    def test_help(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        pass_regex = [
+            r"\[rocprof-sys-avail\] Usage:[\s\S]*"
+            r"\[DEBUG OPTIONS\][\s\S]*"
+            r"\[INFO OPTIONS\][\s\S]*"
+            r"\[FILTER OPTIONS\][\s\S]*"
+            r"\[COLUMN OPTIONS\][\s\S]*"
+            r"\[DISPLAY OPTIONS\][\s\S]*"
+            r"\[OUTPUT OPTIONS\][\s\S]*"
+        ]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["--help"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex)
+
+    def test_all(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["--all"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result)
+
+    def test_all_expand_keys(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        fail_regex = [r"%[a-zA-Z_]%"]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["--all", "--expand-keys"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, fail_regex=fail_regex)
+
+    def test_all_only_available_alphabetical(
+        self,
+        run_test,
+        test_output_dir,
+        assert_regex,
+        assert_file_exists,
+    ):
+        log_file = (
+            test_output_dir / "rocprof-sys-avail-all-only-available-alphabetical.log"
+        )
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=[
+                "--all",
+                "--available",
+                "--alphabetical",
+                "--debug",
+                "--output",
+                str(log_file),
+            ],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result)
+        assert_file_exists(log_file)
+
+    def test_all_csv(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        pass_regex = [
+            r"COMPONENT#AVAILABLE#VALUE_TYPE#STRING_IDS#FILENAME#DESCRIPTION#CATEGORY#[\s\S]*"
+            r"ENVIRONMENT VARIABLE#VALUE#DATA TYPE#DESCRIPTION#CATEGORIES#[\s\S]*"
+            r"HARDWARE COUNTER#DEVICE#AVAILABLE#DESCRIPTION#"
+        ]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["--all", "--csv", "--csv-separator", "#"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex)
+
+    def test_filter_wall_clock_available(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        pass_regex = [
+            r"\|[-]+\|[\s\S]*"
+            r"\|[ ]+COMPONENT[ ]+\|[\s\S]*"
+            r"\|[-]+\|[\s\S]*"
+            r"\| (wall_clock)[ ]+\|[\s\S]*"
+            r"\|[-]+\|"
+        ]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["-r", "wall_clock", "-C", "--available"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex)
+
+    def test_category_filter_rocprofiler_systems(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        pass_regex = [r"ROCPROFSYS_(SETTINGS_DESC|OUTPUT_FILE|OUTPUT_PREFIX)"]
+        fail_regex = [
+            r"ROCPROFSYS_(ADD_SECONDARY|SCIENTIFIC|PRECISION|MEMORY_PRECISION|TIMING_PRECISION)",
+        ]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["--categories", "settings::rocprofsys", "--brief"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex, fail_regex=fail_regex)
+
+    def test_category_filter_timemory(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        pass_regex = [
+            r"ROCPROFSYS_(ADD_SECONDARY|SCIENTIFIC|PRECISION|MEMORY_PRECISION|TIMING_PRECISION)"
+        ]
+        fail_regex = [r"ROCPROFSYS_(SETTINGS_DESC|OUTPUT_FILE)"]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["--categories", "settings::timemory", "--brief", "--advanced"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex, fail_regex=fail_regex)
+
+    def test_regex_negation(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        pass_regex = [
+            r"ENVIRONMENT VARIABLE,[\s\S]*"
+            r"ROCPROFSYS_CI_SKIP_PUSH_POP_CHECK,[\s\S]*"
+            r"ROCPROFSYS_THREAD_POOL_SIZE,[\s\S]*"
+            r"ROCPROFSYS_USE_PID,"
+        ]
+        fail_regex = [r"ROCPROFSYS_TRACE"]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=[
+                "-R",
+                "rocprofsys",
+                "~timemory",
+                "-r",
+                "_P",
+                "~PERFETTO",
+                "~PROCESS_SAMPLING",
+                "~KOKKOSP",
+                "~PAGE",
+                "--csv",
+                "--brief",
+                "--advanced",
+            ],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex, fail_regex=fail_regex)
+
+    def test_write_config(
+        self,
+        run_test,
+        test_output_dir,
+        assert_regex,
+        assert_file_exists,
+    ):
+        config_base = test_output_dir / "rocprof-sys-test"
+
+        avail_cfg_path = test_output_dir / "rocprof-sys-"
+        avail_cfg_path = str(avail_cfg_path).replace("+", r"\+")
+
+        pass_regex = [
+            rf"Outputting JSON configuration file '{avail_cfg_path}test\.json'"
+            r"[\s\S]*"
+            rf"Outputting XML configuration file '{avail_cfg_path}test\.xml'"
+            r"[\s\S]*"
+            rf"Outputting text configuration file '{avail_cfg_path}test\.cfg'"
+        ]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=[
+                "-G",
+                str(config_base) + ".cfg",
+                "-F",
+                "txt",
+                "json",
+                "xml",
+                "--force",
+                "--all",
+                "-c",
+                "rocprofsys",
+            ],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex)
+
+        config_files = [
+            test_output_dir / f"rocprof-sys-test.{ext}" for ext in ["cfg", "json", "xml"]
+        ]
+        assert_file_exists(config_files, subtest_name="Config file existence validation")
+
+    def test_write_config_tweak(
+        self,
+        run_test,
+        test_output_dir,
+        assert_regex,
+        assert_file_exists,
+    ):
+        config_base = test_output_dir / "rocprof-sys-tweak"
+
+        env_overrides = {
+            "ROCPROFSYS_TRACE": "OFF",
+            "ROCPROFSYS_PROFILE": "ON",
+            "ROCPROFSYS_USE_SAMPLING": "OFF",
+            "ROCPROFSYS_TIME_OUTPUT": "OFF",
+        }
+
+        avail_cfg_path = test_output_dir / "rocprof-sys-"
+        avail_cfg_path = str(avail_cfg_path).replace("+", r"\+")
+
+        pass_regex = [
+            rf"Outputting JSON configuration file '{avail_cfg_path}tweak\.json'"
+            r"[\s\S]*"
+            rf"Outputting XML configuration file '{avail_cfg_path}tweak\.xml'"
+            r"[\s\S]*"
+            rf"Outputting text configuration file '{avail_cfg_path}tweak\.cfg'"
+        ]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=[
+                "-G",
+                str(config_base) + ".cfg",
+                "-F",
+                "txt",
+                "json",
+                "xml",
+                "--force",
+            ],
+            timeout=45,
+            fail_on_not_found=True,
+            env=env_overrides,
+        )
+        assert_regex(result, pass_regex=pass_regex)
+
+        config_files = [
+            test_output_dir / f"rocprof-sys-tweak.{ext}" for ext in ["cfg", "json", "xml"]
+        ]
+        assert_file_exists(config_files, subtest_name="Config file existence validation")
+
+    def test_list_keys(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        pass_regex = [r"Output Keys:[\s\S]*%argv%[\s\S]*%argv_hash%"]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["--list-keys", "--expand-keys"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex)
+
+    def test_list_keys_markdown(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        pass_regex = [r"`%argv%`[\s\S]*`%argv_hash%`"]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["--list-keys", "--expand-keys", "--markdown"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex)
+
+    def test_list_categories(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        pass_regex = [r" component::[\s\S]* hw_counters::[\s\S]* settings::"]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["--list-categories"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex)
+
+    def test_core_categories(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        pass_regex = [
+            r"ROCPROFSYS_CONFIG_FILE[\s\S]*ROCPROFSYS_ENABLED[\s\S]*"
+            r"ROCPROFSYS_SUPPRESS_CONFIG[\s\S]*ROCPROFSYS_SUPPRESS_PARSING[\s\S]*ROCPROFSYS_VERBOSE"
+        ]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["-c", "core"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result, pass_regex=pass_regex)
+
+
+# ============================================================================
+# rocprof-sys-run tests
+# ============================================================================
+
+
+class TestRunBinary:
+    """Tests for rocprof-sys-run binary."""
+
+    target = "rocprof-sys-run"
+
+    def test_help(
+        self,
+        run_test,
+        assert_regex,
+    ):
+        """Test rocprof-sys-run --help output."""
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=["--help"],
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result)
+
+    def test_args(
+        self,
+        test_output_dir,
+        run_test,
+        assert_regex,
+    ):
+        """Test rocprof-sys-run with comprehensive arguments."""
+        import shutil
+
+        # Check if sleep command exists
+        sleep_cmd = shutil.which("sleep")
+        if not sleep_cmd:
+            pytest.skip("sleep command not found")
+
+        # Create empty config file
+        config_dir = test_output_dir / "config"
+        config_dir.mkdir(parents=True, exist_ok=True)
+        empty_cfg = config_dir / "empty.cfg"
+        empty_cfg.write_text("#\n# empty config file\n#\n")
+
+        tmpdir = test_output_dir / "tmpdir"
+        tmpdir = tmpdir.resolve()
+        tmpdir.mkdir(parents=True, exist_ok=True)
+
+        args = [
+            "--monochrome",
+            "--debug=false",
+            "-v",
+            "1",
+            "-c",
+            str(empty_cfg),
+            "-o",
+            str(test_output_dir),
+            "run-args-output/",
+            "-TPHD",
+            "-S",
+            "cputime",
+            "realtime",
+            "--trace-wait=1.0e-12",
+            "--trace-duration=5.0",
+            "--wait=1.0",
+            "--duration=3.0",
+            "--trace-file=perfetto-run-args-trace.proto",
+            "--trace-buffer-size=100",
+            "--trace-fill-policy=ring_buffer",
+            "--profile-format",
+            "console",
+            "json",
+            "text",
+            "--process-freq",
+            "1000",
+            "--process-wait",
+            "0.0",
+            "--process-duration",
+            "10",
+            "--cpus",
+            "0-4",
+            "--gpus",
+            "0",
+            "-f",
+            "1000",
+            "--sampling-wait",
+            "1.0",
+            "--sampling-duration",
+            "10",
+            "-t",
+            "0-3",
+            "--sample-cputime",
+            "1000",
+            "1.0",
+            "0-3",
+            "--sample-realtime",
+            "10",
+            "0.5",
+            "0-3",
+            "-I",
+            "all",
+            "-E",
+            "mutex-locks",
+            "rw-locks",
+            "spin-locks",
+            "-C",
+            "perf::INSTRUCTIONS",
+            "--inlines",
+            "--hsa-interrupt",
+            "0",
+            "--use-causal=false",
+            "--use-kokkosp",
+            "--num-threads-hint=4",
+            "--sampling-allocator-size=32",
+            "--ci",
+            "--dl-verbose=3",
+            "--perfetto-annotations=off",
+            "--kokkosp-kernel-logger",
+            "--kokkosp-name-length-max=1024",
+            '--kokkosp-prefix="[kokkos]"',
+            "--tmpdir",
+            str(tmpdir),
+            "--perfetto-backend",
+            "inprocess",
+            "--use-pid",
+            "false",
+            "--time-output",
+            "off",
+            "--thread-pool-size",
+            "0",
+            "--timemory-components",
+            "wall_clock",
+            "cpu_clock",
+            "peak_rss",
+            "page_rss",
+            "--fork",
+            "--",
+            sleep_cmd,
+            "5",
+        ]
+
+        result = run_test(
+            "baseline",
+            target=self.target,
+            run_args=args,
+            timeout=45,
+            fail_on_not_found=True,
+        )
+
+        assert_regex(result)
@@ -0,0 +1,108 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+General configuration file tests.
+"""
+
+from __future__ import annotations
+import pytest
+from pathlib import Path
+import shutil
+
+pytestmark = [pytest.mark.rocprof_config]
+
+
+# ============================================================================
+# Helper functions
+# ============================================================================
+
+
+def write_invalid_config_file(output_dir: Path) -> Path:
+    """Write an invalid configuration file."""
+    config_path = output_dir / "invalid.cfg"
+    config_path.write_text("""\
+ROCPROFSYS_CONFIG_FILE =
+FOOBAR = ON
+""")
+    return config_path
+
+
+# =============================================================================
+# Config fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def config_target(rocprof_config) -> str:
+    """Get the target executable for config tests."""
+    target_name = "parallel-overhead"
+    try:
+        rocprof_config.get_target_executable(target_name)
+    except FileNotFoundError:
+        # Fall back to system ls command
+        target_name = shutil.which("ls") or "ls"
+    return target_name
+
+
+# =============================================================================
+# Configuration file tests
+# =============================================================================
+
+
+class TestConfig:
+    """Tests for configuration file tests."""
+
+    def test_invalid_config(
+        self,
+        test_output_dir: Path,
+        config_target: str,
+        run_test,
+        assert_regex,
+    ):
+        """Test that invalid config file causes failure."""
+        # Write invalid configuration file to test output directory
+        config_file = write_invalid_config_file(test_output_dir)
+
+        env = {"ROCPROFSYS_CONFIG_FILE": str(config_file)}
+
+        result = run_test(
+            "runtime_instrument",
+            target=config_target,
+            env=env,
+            timeout=400,  # In xdist, it can take much longer
+            fail_on_pass=True,  # Expected to fail
+        )
+
+        assert_regex(
+            result,
+            pass_regex=[r"Unknown setting 'FOOBAR' \(value = 'ON'\)"],
+            use_abort_fail_regex=False,
+        )
+
+    def test_missing_config(
+        self,
+        test_output_dir: Path,
+        config_target: str,
+        run_test,
+        assert_regex,
+    ):
+        """Test that missing config file causes failure."""
+        # Use a path to a config file that doesn't exist
+        missing_config = test_output_dir / "missing.cfg"
+
+        env = {"ROCPROFSYS_CONFIG_FILE": str(missing_config)}
+
+        result = run_test(
+            "runtime_instrument",
+            target=config_target,
+            env=env,
+            timeout=120,
+            fail_on_pass=True,  # Expected to fail
+        )
+
+        assert_regex(
+            result,
+            pass_regex=[r"Error reading configuration file"],
+            use_abort_fail_regex=False,
+        )
@@ -0,0 +1,79 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+Tests for GPU connectivity
+"""
+
+from __future__ import annotations
+import pytest
+from pathlib import Path
+
+# =============================================================================
+# GPU connectivity fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def gpu_connect_env() -> dict[str, str]:
+    """Environment variables for GPU connectivity tests."""
+    return {
+        "ROCPROFSYS_TRACE": "ON",
+        "ROCPROFSYS_TRACE_LEGACY": "ON",
+        "ROCPROFSYS_ROCM_DOMAINS": "hip_runtime_api",
+        "ROCPROFSYS_AMD_SMI_METRICS": "busy,temp,power,xgmi,pcie",
+        "ROCPROFSYS_SAMPLING_CPUS": "none",
+        "ROCPROFSYS_USE_SAMPLING": "OFF",
+        "ROCPROFSYS_PROCESS_SAMPLING_FREQ": "50",
+        "ROCPROFSYS_CPU_FREQ_ENABLED": "OFF",
+    }
+
+
+@pytest.fixture
+def gpu_connect_rules(validation_rules_dir: Path) -> list[Path]:
+    """Get validation rules for GPU connectivity tests."""
+    rules_dir = validation_rules_dir / "gpu-connect"
+    return [
+        rules_dir / "validation-rules.json",
+        rules_dir / "amd-smi-rules.json",
+    ]
+
+
+# =============================================================================
+# GPU connectivity tests
+# =============================================================================
+
+
+@pytest.mark.gpu
+@pytest.mark.xgmi
+@pytest.mark.run_if_gpu_category("not apu or instinct")
+class TestGPUConnect:
+    """Tests for GPU connectivity tests."""
+
+    @pytest.mark.rocpd("gpu_connect_env")
+    def test_sys_run(
+        self,
+        run_test,
+        gpu_connect_env: dict[str, str],
+        gpu_connect_rules: list[Path],
+        assert_regex,
+        assert_perfetto,
+        assert_rocpd,
+    ):
+        result = run_test(
+            "sys_run",
+            target="transferBench",
+            env=gpu_connect_env,
+            timeout=120,
+        )
+
+        # Determine whether to skip or not
+        if "Error: No valid transfers created" in result.test_output:
+            pytest.skip("No valid transfers created")
+        else:
+            assert_regex(result)
+            assert_perfetto(
+                result,
+                counter_names=["XGMI Read Data", "XGMI Write Data"],
+            )
+            assert_rocpd(result, rules_files=gpu_connect_rules)
@@ -0,0 +1,96 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+Tests for HIP stream API
+"""
+
+from __future__ import annotations
+import pytest
+
+# =============================================================================
+# HIP stream tests
+# =============================================================================
+
+
+@pytest.mark.gpu
+@pytest.mark.rocm_min_version("7.0")
+@pytest.mark.group_by_queue
+class TestTransposeGroupByQueue:
+    """Tests for transpose with group by queue"""
+
+    def test_sampling(
+        self,
+        run_test,
+        base_env: dict[str, str],
+        assert_regex,
+    ):
+        env = base_env.copy()
+        env["ROCPROFSYS_ROCM_GROUP_BY_QUEUE"] = "YES"
+        result = run_test(
+            "sampling",
+            target="transpose",
+            env=env,
+            timeout=120,
+        )
+
+        assert_regex(result)
+
+    def test_sys_run(
+        self,
+        run_test,
+        base_env: dict[str, str],
+        assert_regex,
+    ):
+        env = base_env.copy()
+        env["ROCPROFSYS_ROCM_GROUP_BY_QUEUE"] = "YES"
+
+        result = run_test(
+            "sys_run",
+            target="transpose",
+            env=env,
+            timeout=120,
+        )
+
+        assert_regex(result)
+
+
+@pytest.mark.gpu
+@pytest.mark.rocm_min_version("7.0")
+@pytest.mark.group_by_stream
+class TestTransposeGroupByStream:
+    def test_sampling(
+        self,
+        run_test,
+        base_env: dict[str, str],
+        assert_regex,
+    ):
+        env = base_env.copy()
+        env["ROCPROFSYS_ROCM_GROUP_BY_QUEUE"] = "NO"
+
+        result = run_test(
+            "sampling",
+            target="transpose",
+            env=env,
+            timeout=120,
+        )
+
+        assert_regex(result)
+
+    def test_sys_run(
+        self,
+        run_test,
+        base_env: dict[str, str],
+        assert_regex,
+    ):
+        env = base_env.copy()
+        env["ROCPROFSYS_ROCM_GROUP_BY_QUEUE"] = "NO"
+
+        result = run_test(
+            "sys_run",
+            target="transpose",
+            env=env,
+            timeout=120,
+        )
+
+        assert_regex(result)
@@ -0,0 +1,118 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+Tests for the jpegdecode example.
+"""
+
+from __future__ import annotations
+import pytest
+from pathlib import Path
+
+pytestmark = [pytest.mark.gpu, pytest.mark.decode, pytest.mark.jpegdecode]
+
+from rocprofsys import (
+    GPUInfo,
+    RocprofsysConfig,
+)
+
+# =============================================================================
+# JPEG decode fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def jpeg_decode_env() -> dict[str, str]:
+    """Environment variables for JPEG decode tests."""
+    return {
+        "ROCPROFSYS_ROCM_DOMAINS": "hip_runtime_api,kernel_dispatch,memory_copy,rocjpeg_api",
+        "ROCPROFSYS_AMD_SMI_METRICS": "busy,temp,power,jpeg_activity,mem_usage",
+        "ROCPROFSYS_SAMPLING_CPUS": "none",
+    }
+
+
+@pytest.fixture
+def jpeg_decode_rules(validation_rules_dir: Path) -> list[Path]:
+    """Get validation rules for JPEG decode tests."""
+    rules_dir = validation_rules_dir / "jpeg-decode"
+    return [
+        validation_rules_dir / "default-rules.json",
+        rules_dir / "validation-rules.json",
+        rules_dir / "sdk-metrics-rules.json",
+    ]
+
+
+# =============================================================================
+# JPEG decode tests
+# =============================================================================
+
+
+class TestJPEGDecode:
+    """Tests for the jpegdecode example."""
+
+    @pytest.mark.rocpd("jpeg_decode_env")
+    def test_sampling(
+        self,
+        run_test,
+        rocprof_config: RocprofsysConfig,
+        jpeg_decode_env: dict[str, str],
+        gpu_info: GPUInfo,
+        jpeg_decode_rules: list[Path],
+        assert_regex,
+        assert_perfetto,
+        assert_rocpd,
+    ):
+        env = jpeg_decode_env.copy()
+        if "instinct" in gpu_info.categories:
+            rules_dir = rocprof_config.rocpd_validation_rules / "jpeg-decode"
+            jpeg_decode_rules.append(rules_dir / "amd-smi-rules.json")
+
+        result = run_test(
+            "sampling",
+            target="jpegdecode",
+            env=env,
+            timeout=120,
+            run_args=[
+                "-i",
+                str(rocprof_config.rocprofsys_examples_dir / "images"),
+                "-b",
+                "32",
+            ],
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result)
+        assert_perfetto(
+            result,
+            categories=["rocm_rocjpeg_api"],
+            labels=["rocJpegCreate"],
+            counts=[1],
+            depths=[1],
+            counter_names=(
+                ["JPEG Activity"] if "instinct" in gpu_info.categories else None
+            ),
+        )
+        assert_rocpd(result, rules_files=jpeg_decode_rules)
+
+    def test_sys_run(
+        self,
+        run_test,
+        rocprof_config: RocprofsysConfig,
+        jpeg_decode_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test(
+            "sys_run",
+            target="jpegdecode",
+            env=jpeg_decode_env,
+            timeout=120,
+            run_args=[
+                "-i",
+                str(rocprof_config.rocprofsys_examples_dir / "images"),
+                "-b",
+                "32",
+            ],
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result)
@@ -0,0 +1,529 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+Tests for OpenMP integration with rocprofiler-systems.
+
+This module tests OpenMP examples with various configurations:
+- OpenMP CG (Conjugate Gradient) with OMPT
+- OpenMP LU decomposition
+- OpenMP target offload (GPU)
+- OpenMP VV Host
+- OpenMP VV Offload (GPU)
+- Sampling duration tests
+
+Note: OMPT backend is unavailable and tests are skipped if no GPU is available.
+"""
+
+from __future__ import annotations
+import pytest
+from pathlib import Path
+
+# OpenMP will not be traced if no GPU is available, this includes CPU-only
+pytestmark = [pytest.mark.gpu, pytest.mark.openmp]
+
+# ============================================================================
+# OpenMP Fixtures
+# ============================================================================
+
+
+@pytest.fixture
+def ompt_env() -> dict[str, str]:
+    """Environment variables for OMPT tests."""
+    return {
+        "ROCPROFSYS_TRACE": "ON",
+        "ROCPROFSYS_PROFILE": "ON",
+        "ROCPROFSYS_TIME_OUTPUT": "OFF",
+        "ROCPROFSYS_USE_OMPT": "ON",
+        "ROCPROFSYS_TIMEMORY_COMPONENTS": "wall_clock,trip_count,peak_rss",
+        "OMP_PROC_BIND": "spread",
+        "OMP_PLACES": "threads",
+        "OMP_NUM_THREADS": "2",
+    }
+
+
+@pytest.fixture
+def ompt_sampling_env(ompt_env: dict[str, str]) -> dict[str, str]:
+    """Environment variables for sampling duration tests."""
+    env = ompt_env.copy()
+    env.update(
+        {
+            "ROCPROFSYS_VERBOSE": "2",
+            "ROCPROFSYS_USE_OMPT": "OFF",
+            "ROCPROFSYS_USE_SAMPLING": "ON",
+            "ROCPROFSYS_USE_PROCESS_SAMPLING": "OFF",
+            "ROCPROFSYS_SAMPLING_FREQ": "100",
+            "ROCPROFSYS_SAMPLING_DELAY": "0.1",
+            "ROCPROFSYS_SAMPLING_DURATION": "0.25",
+            "ROCPROFSYS_SAMPLING_CPUTIME": "ON",
+            "ROCPROFSYS_SAMPLING_REALTIME": "ON",
+            "ROCPROFSYS_SAMPLING_CPUTIME_FREQ": "1000",
+            "ROCPROFSYS_SAMPLING_REALTIME_FREQ": "500",
+            "ROCPROFSYS_MONOCHROME": "ON",
+        }
+    )
+    return env
+
+
+@pytest.fixture
+def openmp_target_env(ompt_env: dict[str, str]) -> dict[str, str]:
+    """Environment variables for OpenMP target (GPU) tests."""
+    env = ompt_env.copy()
+    env["ROCPROFSYS_ROCM_DOMAINS"] = "hip_api,hsa_api,kernel_dispatch"
+    return env
+
+
+@pytest.fixture
+def ompt_no_tmp_env(ompt_env: dict[str, str]) -> dict[str, str]:
+    """Environment variables for no-tmp-files tests."""
+    env = ompt_env.copy()
+    env.update(
+        {
+            "ROCPROFSYS_VERBOSE": "2",
+            "ROCPROFSYS_USE_OMPT": "OFF",
+            "ROCPROFSYS_USE_SAMPLING": "ON",
+            "ROCPROFSYS_USE_PROCESS_SAMPLING": "OFF",
+            "ROCPROFSYS_SAMPLING_CPUTIME": "ON",
+            "ROCPROFSYS_SAMPLING_REALTIME": "OFF",
+            "ROCPROFSYS_SAMPLING_CPUTIME_FREQ": "700",
+            "ROCPROFSYS_USE_TEMPORARY_FILES": "OFF",
+            "ROCPROFSYS_MONOCHROME": "ON",
+        }
+    )
+    return env
+
+
+@pytest.fixture
+def openmp_target_rules(validation_rules_dir: Path) -> list[Path]:
+    """Get validation rules for OpenMP target tests."""
+    rules_dir = validation_rules_dir / "openmp-target"
+    return [
+        rules_dir / "kernel-rules.json",
+        rules_dir / "sdk-metrics-rules.json",
+    ]
+
+
+# ============================================================================
+# Test Class: OpenMP CG Tests
+# ============================================================================
+
+
+class TestOpenMPCG:
+    """Tests for OpenMP Conjugate Gradient example."""
+
+    REWRITE_ARGS = ["-e", "-v", "2", "--instrument-loops"]
+
+    def test_sampling(
+        self,
+        ompt_env: dict[str, str],
+        run_test,
+        assert_regex,
+    ):
+        env = ompt_env.copy()
+        env["ROCPROFSYS_USE_SAMPLING"] = "OFF"
+        env["ROCPROFSYS_COUT_OUTPUT"] = "ON"
+
+        result = run_test(
+            "sampling",
+            target="openmp-cg",
+            env=env,
+            timeout=180,
+            no_check_target_arch=True,
+        )
+        assert_regex(result)
+
+    def test_binary_rewrite(
+        self,
+        run_test,
+        ompt_env: dict[str, str],
+        assert_regex,
+    ):
+        env = ompt_env.copy()
+        env["ROCPROFSYS_USE_SAMPLING"] = "OFF"
+        env["ROCPROFSYS_COUT_OUTPUT"] = "ON"
+
+        result = run_test(
+            "binary_rewrite",
+            target="openmp-cg",
+            rewrite_args=self.REWRITE_ARGS,
+            env=env,
+            timeout=180,
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result)
+
+
+# ============================================================================
+# Test Class: OpenMP LU Tests
+# ============================================================================
+
+
+class TestOpenMPLU:
+    """Tests for OpenMP LU decomposition example."""
+
+    REWRITE_ARGS = ["-e", "-v", "2", "--instrument-loops"]
+
+    def test_binary_rewrite(
+        self,
+        run_test,
+        ompt_env: dict[str, str],
+        assert_regex,
+    ):
+        env = ompt_env.copy()
+        env["ROCPROFSYS_USE_SAMPLING"] = "ON"
+        env["ROCPROFSYS_SAMPLING_FREQ"] = "50"
+        env["ROCPROFSYS_COUT_OUTPUT"] = "ON"
+
+        result = run_test(
+            "binary_rewrite",
+            target="openmp-lu",
+            rewrite_args=self.REWRITE_ARGS,
+            env=env,
+            timeout=180,
+            no_check_target_arch=True,
+        )
+        assert_regex(result)
+
+
+# ============================================================================
+# Test Class: OpenMP Target (GPU) Tests
+# ============================================================================
+
+
+@pytest.mark.openmp_target
+class TestOpenMPTarget:
+    """Tests for OpenMP target offload (GPU) example."""
+
+    @pytest.mark.rocpd("openmp_target_env")
+    def test_sampling(
+        self,
+        run_test,
+        openmp_target_env: dict[str, str],
+        openmp_target_rules: list[Path],
+        assert_regex,
+        assert_perfetto,
+        assert_rocpd,
+    ):
+        result = run_test(
+            "sampling",
+            target="openmp-target",
+            env=openmp_target_env,
+            timeout=300,
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result)
+        assert_rocpd(result, rules_files=openmp_target_rules)
+        assert_perfetto(
+            result,
+            subtest_name="Perfetto Kernel Dispatch Validation",
+            categories=["rocm_kernel_dispatch"],
+            label_substrings=[
+                "Z4vmulIiEvPT_S1_S1_i_l51.kd",
+                "Z4vmulIfEvPT_S1_S1_i_l51.kd",
+                "Z4vmulIdEvPT_S1_S1_i_l51.kd",
+            ],
+            depths=[0, 0, 0],
+            counts=[4, 4, 4],
+        )
+
+
+# ============================================================================
+# Test Class: OpenMP-VV Host Tests
+# ============================================================================
+
+
+@pytest.mark.parametrize(
+    "target_name",
+    [
+        "openmp-vv-host-test-parallel-for-simd-atomic",
+        "openmp-vv-host-test-team-default-shared",
+    ],
+    ids=["parallel-for-simd-atomic", "team-default-shared"],
+)
+@pytest.mark.ompvv
+class TestOpenMPVVHost:
+    """Tests for OpenMP VV host programs."""
+
+    def test_baseline(
+        self,
+        run_test,
+        ompt_env: dict[str, str],
+        target_name: str,
+        assert_regex,
+    ):
+        result = run_test(
+            "baseline",
+            target=target_name,
+            env=ompt_env,
+            timeout=180,
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result)
+
+    def test_sampling(
+        self,
+        run_test,
+        ompt_env: dict[str, str],
+        target_name: str,
+        assert_regex,
+        assert_perfetto,
+    ):
+        result = run_test(
+            "sampling",
+            target=target_name,
+            env=ompt_env,
+            timeout=180,
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result)
+        assert_perfetto(result)
+
+    def test_binary_rewrite(
+        self,
+        run_test,
+        ompt_env: dict[str, str],
+        target_name: str,
+        assert_regex,
+    ):
+        env = ompt_env.copy()
+        env["ROCPROFSYS_COUT_OUTPUT"] = "ON"
+
+        result = run_test(
+            "binary_rewrite",
+            target=target_name,
+            rewrite_args=["-e", "-v", "2", "--instrument-loops"],
+            env=env,
+            timeout=180,
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result, pass_regex=[r"omp_parallel"])
+
+    def test_runtime_instrument(
+        self,
+        run_test,
+        ompt_env: dict[str, str],
+        target_name: str,
+        assert_regex,
+    ):
+        env = ompt_env.copy()
+        env["ROCPROFSYS_COUT_OUTPUT"] = "ON"
+        env["ROCPROFSYS_CI_SKIP_PUSH_POP_CHECK"] = "ON"
+
+        result = run_test(
+            "runtime_instrument",
+            target=target_name,
+            instrument_args=["-e", "-v", "1", "--label", "return", "args"],
+            env=env,
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result, pass_regex=[r"omp_parallel"])
+
+    def test_sys_run(
+        self,
+        run_test,
+        ompt_env: dict[str, str],
+        target_name: str,
+        assert_regex,
+        assert_perfetto,
+    ):
+        result = run_test(
+            "sys_run",
+            target=target_name,
+            env=ompt_env,
+            timeout=180,
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result)
+        assert_perfetto(result)
+
+
+# ============================================================================
+# Test Class: OpenMP-VV Offload (GPU) Tests
+# ============================================================================
+
+
+@pytest.mark.parametrize(
+    "target_name",
+    [
+        "openmp-vv-offload-test-target-simd-if",
+        "openmp-vv-offload-test-target-teams-distribute-parallel-for-collapse",
+    ],
+    ids=["target-simd-if", "target-teams-distribute-parallel-for-collapse"],
+)
+@pytest.mark.openmp_target
+@pytest.mark.ompvv
+class TestOpenMPVVOffload:
+    """Tests for OpenMP VV offload programs."""
+
+    def test_baseline(
+        self,
+        run_test,
+        openmp_target_env: dict[str, str],
+        target_name: str,
+        assert_regex,
+    ):
+        result = run_test(
+            "baseline",
+            target=target_name,
+            env=openmp_target_env,
+            timeout=300,
+        )
+
+        assert_regex(result)
+
+    def test_sampling(
+        self,
+        run_test,
+        openmp_target_env: dict[str, str],
+        target_name: str,
+        assert_regex,
+    ):
+        result = run_test(
+            "sampling",
+            target=target_name,
+            env=openmp_target_env,
+            timeout=300,
+        )
+
+        assert_regex(result)
+
+    def test_binary_rewrite(
+        self,
+        run_test,
+        openmp_target_env: dict[str, str],
+        target_name: str,
+        assert_regex,
+    ):
+        env = openmp_target_env.copy()
+        env["ROCPROFSYS_COUT_OUTPUT"] = "ON"
+
+        result = run_test(
+            "binary_rewrite",
+            target=target_name,
+            rewrite_args=["-e", "-v", "2"],
+            env=env,
+            timeout=300,
+        )
+
+        assert_regex(result, pass_regex=[r"omp_offloading"])
+
+    def test_sys_run(
+        self,
+        run_test,
+        openmp_target_env: dict[str, str],
+        target_name: str,
+        assert_regex,
+        assert_perfetto,
+    ):
+        result = run_test(
+            "sys_run",
+            target=target_name,
+            run_args=["-e", "-v", "1", "--label", "return", "args"],
+            env=openmp_target_env,
+            timeout=300,
+        )
+
+        assert_regex(result)
+        assert_perfetto(result)
+
+
+# ============================================================================
+# Test Class: Sampling Duration Tests
+# ============================================================================
+
+
+@pytest.mark.sampling_duration
+class TestSamplingDuration:
+    """Tests for sampling duration functionality."""
+
+    # Regex patterns from CMake _ompt_sampling_samp_regex and _ompt_sampling_file_regex
+    SAMPLING_PASS_REGEX = [
+        r"Sampler for thread 0 will be triggered 1000\.0x per second of CPU-time",
+        r"Sampler for thread 0 will be triggered 500\.0x per second of wall-time",
+        r"Sampling will be disabled after 0\.250000 seconds",
+        r"Sampling duration of 0\.250000 seconds has elapsed\. Shutting down sampling",
+        r"sampling_percent\.(json|txt)",
+        r"sampling_cpu_clock\.(json|txt)",
+        r"sampling_wall_clock\.(json|txt)",
+    ]
+
+    def test_cg_sampling_duration(
+        self,
+        ompt_sampling_env: dict[str, str],
+        run_test,
+        assert_regex,
+    ):
+        result = run_test(
+            "sampling",
+            target="openmp-cg",
+            env=ompt_sampling_env,
+            timeout=300,
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result, pass_regex=self.SAMPLING_PASS_REGEX)
+
+    def test_lu_sampling_duration(
+        self,
+        run_test,
+        ompt_sampling_env: dict[str, str],
+        assert_regex,
+    ):
+        """Test OpenMP LU with sampling duration limits."""
+        result = run_test(
+            "sampling",
+            target="openmp-lu",
+            env=ompt_sampling_env,
+            timeout=300,
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result, pass_regex=self.SAMPLING_PASS_REGEX)
+
+
+# ============================================================================
+# Test Class: No Temporary Files Tests
+# ============================================================================
+
+
+@pytest.mark.no_tmp_files
+class TestNoTmpFiles:
+    """Tests for operation without temporary files."""
+
+    NOTMP_SAMPLING_FILE_REGEX = [
+        r"sampling_percent\.(json|txt)",
+        r"sampling_cpu_clock\.(json|txt)",
+        r"sampling_wall_clock\.(json|txt)",
+    ]
+
+    def test_cg_no_tmp_files(
+        self,
+        run_test,
+        ompt_no_tmp_env: dict[str, str],
+        assert_regex,
+        assert_perfetto,
+        assert_file_exists,
+    ):
+        """Test OpenMP CG without temporary files."""
+        result = run_test(
+            "sampling",
+            target="openmp-cg",
+            env=ompt_no_tmp_env,
+            timeout=300,
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result, pass_regex=self.NOTMP_SAMPLING_FILE_REGEX)
+        assert_perfetto(result)
+
+        sampling_files = list(result.output_dir.glob("sampling_*.json")) + list(
+            result.output_dir.glob("sampling_*.txt")
+        )
+        assert_file_exists(sampling_files)
@@ -0,0 +1,187 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+Tests for RCCL
+
+MPI is unsupported for RCCL tests.
+"""
+
+from __future__ import annotations
+import pytest
+
+pytestmark = [pytest.mark.rccl, pytest.mark.disable("all")]
+
+# =============================================================================
+# RCCL fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def rccl_env() -> dict[str, str]:
+    """Environment variables for RCCL tests."""
+    return {
+        "ROCPROFSYS_TRACE_LEGACY": "OFF",
+        "ROCPROFSYS_TRACE_CACHED": "ON",
+        "ROCPROFSYS_PROFILE": "ON",
+        "ROCPROFSYS_USE_SAMPLING": "OFF",
+        "ROCPROFSYS_USE_PROCESS_SAMPLING": "ON",
+        "ROCPROFSYS_TIME_OUTPUT": "OFF",
+        "ROCPROFSYS_USE_PID": "OFF",
+        "ROCPROFSYS_USE_RCCLP": "ON",
+        "ROCPROFSYS_ROCM_DOMAINS": "hip_runtime_api,kernel_dispatch,memory_copy",
+        "OMP_PROC_BIND": "spread",
+        "OMP_PLACES": "threads",
+        "OMP_NUM_THREADS": "2",
+    }
+
+
+# =============================================================================
+# RCCL tests
+# =============================================================================
+
+
+# RCCL test binaries
+RCCL_TARGETS = [
+    "all_reduce_perf",
+    "all_gather_perf",
+    "broadcast_perf",
+    "reduce_scatter_perf",
+    "reduce_perf",
+    "alltoall_perf",
+    "scatter_perf",
+    "gather_perf",
+    "sendrecv_perf",
+    "alltoallv_perf",
+]
+
+
+@pytest.mark.parametrize(
+    "rccl_target",
+    RCCL_TARGETS,
+    ids=[t.replace("_", "-") for t in RCCL_TARGETS],
+)
+@pytest.mark.gpu
+class TestRCCL:
+
+    REWRITE_ARGS = [
+        "-e",
+        "-v",
+        "2",
+        "-i",
+        "8",
+        "--label",
+        "file",
+        "line",
+        "return",
+        "args",
+    ]
+
+    RUNTIME_ARGS = [
+        "-e",
+        "-v",
+        "1",
+        "-i",
+        "8",
+        "--label",
+        "file",
+        "line",
+        "return",
+        "args",
+        "-ME",
+        "sysdeps",
+        "--log-file",
+        "rccl-test.log",
+    ]
+
+    RUN_ARGS = [
+        "-t",
+        "1",
+        "-g",
+        "1",
+        "-i",
+        "10",
+        "-w",
+        "2",
+        "-m",
+        "2",
+        "-p",
+        "-c",
+        "1",
+        "-z",
+        "-s",
+        "1",
+    ]
+
+    def test_sampling(
+        self,
+        rccl_target: str,
+        run_test,
+        rccl_env: dict[str, str],
+        assert_regex,
+        assert_perfetto,
+    ):
+        result = run_test(
+            "sampling",
+            target=rccl_target,
+            env=rccl_env,
+            run_args=self.RUN_ARGS,
+            timeout=300,
+        )
+        assert_regex(result)
+        assert_perfetto(
+            result,
+            categories=["rocm_rccl_api"],
+            counter_names=["RCCL Comm"],
+        )
+
+    def test_binary_rewrite(
+        self,
+        rccl_target: str,
+        run_test,
+        rccl_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test(
+            "binary_rewrite",
+            target=rccl_target,
+            env=rccl_env,
+            run_args=self.RUN_ARGS,
+            rewrite_args=self.REWRITE_ARGS,
+            timeout=300,
+        )
+        assert_regex(result)
+
+    @pytest.mark.slow
+    def test_runtime_instrument(
+        self,
+        rccl_target: str,
+        run_test,
+        rccl_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test(
+            "runtime_instrument",
+            target=rccl_target,
+            env=rccl_env,
+            run_args=self.RUN_ARGS,
+            instrument_args=self.RUNTIME_ARGS,
+            timeout=300,
+        )
+        assert_regex(result)
+
+    def test_sys_run(
+        self,
+        rccl_target: str,
+        run_test,
+        rccl_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test(
+            "sys_run",
+            target=rccl_target,
+            env=rccl_env,
+            run_args=self.RUN_ARGS,
+            timeout=300,
+        )
+        assert_regex(result)
@@ -0,0 +1,164 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+Tests for the ROCTX marker API integration with rocprofiler-systems.
+Equivalent to rocprof-sys-roctx-tests.cmake
+"""
+
+from __future__ import annotations
+import pytest
+from pathlib import Path
+
+pytestmark = [pytest.mark.gpu, pytest.mark.roctx]
+
+# =============================================================================
+# rocTX fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def roctx_env() -> dict[str, str]:
+    """Environment variables for rocTX tests."""
+    return {
+        "ROCPROFSYS_TRACE_LEGACY": "ON",
+        "ROCPROFSYS_ROCM_DOMAINS": "hip_runtime_api,marker_api,kernel_dispatch",
+    }
+
+
+@pytest.fixture
+def roctx_rules(validation_rules_dir: Path) -> list[Path]:
+    """Get validation rules for rocTX tests."""
+    rules_dir = validation_rules_dir / "roctx"
+    return [
+        rules_dir / "validation-rules.json",
+        rules_dir / "amd-smi-rules.json",
+        rules_dir / "sdk-metrics-rules.json",
+    ]
+
+
+# ============================================================================
+# Test Class: rocTX Tests
+# ============================================================================
+
+
+class TestRoctx:
+    """Tests for rocTX marker API."""
+
+    def roctx_legacy_labels(self) -> list[str]:
+        return [
+            "roctxMark_GPU_workload",
+            "roctxRangePush_run_profiling",
+            "roctxRangeStart_GPU_Compute",
+            "roctxRangeStart_GPU_Compute",
+            "roctxRangePush_HIP_Kernel",
+            "roctxRangePush_HIP_Kernel",
+            "roctxGetThreadId",
+            "roctxMark_RoctxProfilerPause_End",
+            "roctxMark_Thread_Start",
+            "roctxMark_End",
+            "roctxMark_Finished_GPU",
+        ]
+
+    def roctx_legacy_count(self) -> list[int]:
+        return [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+
+    def roctx_legacy_depth(self) -> list[int]:
+        return [1, 1, 2, 0, 3, 1, 2, 2, 0, 0, 1]
+
+    def roctx_cached_labels(self) -> list[str]:
+        return [
+            "roctxMark_GPU_workload",
+            "roctxRangePush_HIP_Kernel",
+            "roctxRangeStart_GPU_Compute",
+            "roctxGetThreadId",
+            "roctxMark_RoctxProfilerPause_End",
+            "roctxMark_Thread_Start",
+            "roctxMark_End",
+            "roctxRangePush_run_profiling",
+            "roctxMark_Finished_GPU",
+        ]
+
+    def roctx_cached_count(self) -> list[int]:
+        return [1, 2, 2, 1, 1, 1, 1, 1, 1]
+
+    def roctx_cached_depth(self) -> list[int]:
+        return [1, 1, 1, 1, 1, 2, 1, 1, 1]
+
+    REWRITE_ARGS = ["-e", "-v", "2", "--instrument-loops"]
+
+    def test_baseline(
+        self,
+        roctx_env: dict[str, str],
+        run_test,
+        assert_regex,
+    ):
+        result = run_test("baseline", target="roctx", env=roctx_env, timeout=120)
+        assert_regex(result)
+
+    @pytest.mark.disable("assert_rocpd")
+    @pytest.mark.rocpd("roctx_env")
+    def test_sampling(
+        self,
+        run_test,
+        roctx_env: dict[str, str],
+        roctx_rules: list[Path],
+        assert_regex,
+        assert_perfetto,
+        assert_rocpd,
+    ):
+        env = roctx_env.copy()
+        categories = ["rocm_marker_api"]
+        if env["ROCPROFSYS_TRACE_LEGACY"] == "ON":
+            labels = self.roctx_legacy_labels()
+            counts = self.roctx_legacy_count()
+            depths = self.roctx_legacy_depth()
+        else:
+            labels = self.roctx_cached_labels()
+            counts = self.roctx_cached_count()
+            depths = self.roctx_cached_depth()
+
+        result = run_test("sampling", target="roctx", env=env, timeout=120)
+
+        assert_regex(result)
+        assert_perfetto(
+            result,
+            subtest_name="Perfetto counter validation",
+            categories=categories,
+            labels=labels,
+            counts=counts,
+            depths=depths,
+        )
+        assert_rocpd(
+            result,
+            rules_files=roctx_rules,
+        )
+
+    def test_binary_rewrite(
+        self,
+        run_test,
+        roctx_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test(
+            "binary_rewrite",
+            target="roctx",
+            rewrite_args=self.REWRITE_ARGS,
+            env=roctx_env,
+            timeout=120,
+        )
+        assert_regex(result)
+
+    def test_sys_run(
+        self,
+        run_test,
+        roctx_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test(
+            "sys_run",
+            target="roctx",
+            env=roctx_env,
+            timeout=120,
+        )
+        assert_regex(result)
@@ -0,0 +1,205 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+Tests for the trace time window example.
+Equivalent to rocprof-sys-time-window-tests.cmake
+"""
+
+from __future__ import annotations
+import pytest
+
+pytestmark = [pytest.mark.time_window]
+
+# ============================================================================
+# Time Window Fixtures
+# ============================================================================
+
+
+@pytest.fixture
+def time_window_env() -> dict[str, str]:
+    """Environment variables for time window tests."""
+    return {
+        "ROCPROFSYS_USE_SAMPLING": "OFF",
+        "ROCPROFSYS_USE_PROCESS_SAMPLING": "OFF",
+        "ROCPROFSYS_VERBOSE": "2",
+    }
+
+
+# ============================================================================
+# Test Class: Trace Time Window Tests
+# ============================================================================
+
+
+class TestTraceTimeWindow:
+
+    REWRITE_ARGS = ["-e", "-v", "2", "--caller-include", "inner", "-i", "4096"]
+    RUNTIME_ARGS = ["-e", "-v", "1", "--caller-include", "inner", "-i", "4096"]
+
+    def test_binary_rewrite(
+        self,
+        run_test,
+        time_window_env: dict[str, str],
+        assert_perfetto,
+        assert_timemory,
+        assert_regex,
+    ):
+        env = time_window_env.copy()
+        env.update({"ROCPROFSYS_TRACE_DURATION": "1.25"})
+
+        result = run_test(
+            "binary_rewrite",
+            target="trace-time-window",
+            rewrite_args=self.REWRITE_ARGS,
+            env=env,
+            timeout=120,
+        )
+
+        assert_regex(result)
+        assert_timemory(
+            result,
+            file_name="wall_clock.json",
+            metric="wall_clock",
+            labels=["trace-time-window.inst", "outer_a", "outer_b", "outer_c"],
+            counts=[1, 1, 1, 1],
+            depths=[0, 1, 1, 1],
+            fail_regex=["outer_d"],  # time window should exclude this
+        )
+        assert_perfetto(
+            result,
+            labels=["trace-time-window.inst", "outer_a", "outer_b", "outer_c"],
+            counts=[1, 1, 1, 1],
+            depths=[0, 1, 1, 1],
+            fail_regex=["outer_d"],  # time window should exclude this
+        )
+
+    def test_runtime_instrument(
+        self,
+        run_test,
+        time_window_env: dict[str, str],
+        assert_regex,
+        assert_perfetto,
+        assert_timemory,
+    ):
+        env = time_window_env.copy()
+        env.update({"ROCPROFSYS_TRACE_DURATION": "1.25"})
+
+        result = run_test(
+            "runtime_instrument",
+            target="trace-time-window",
+            instrument_args=self.RUNTIME_ARGS,
+            env=env,
+            timeout=400,  # In xdist, it can take much longer
+        )
+
+        assert_regex(result)
+        assert_timemory(
+            result,
+            file_name="wall_clock.json",
+            metric="wall_clock",
+            labels=["trace-time-window", "outer_a", "outer_b", "outer_c"],
+            counts=[1, 1, 1, 1],
+            depths=[0, 1, 1, 1],
+            fail_regex=["outer_d"],  # time window should exclude this
+        )
+        assert_perfetto(
+            result,
+            categories=["host"],
+            labels=["trace-time-window", "outer_a", "outer_b", "outer_c"],
+            counts=[1, 1, 1, 1],
+            depths=[0, 1, 1, 1],
+            fail_regex=["outer_d"],  # time window should exclude this
+        )
+
+
+# ============================================================================
+# Test Class: Trace Time Window Delay Tests
+# ============================================================================
+
+
+class TestTraceTimeWindowDelay:
+    """Tests for trace time window with delay."""
+
+    REWRITE_ARGS = ["-e", "-v", "2", "--caller-include", "inner", "-i", "4096"]
+    RUNTIME_ARGS = ["-e", "-v", "1", "--caller-include", "inner", "-i", "4096"]
+
+    def test_binary_rewrite(
+        self,
+        run_test,
+        time_window_env: dict[str, str],
+        assert_perfetto,
+        assert_timemory,
+        assert_regex,
+    ):
+        env = time_window_env.copy()
+        env.update(
+            {
+                "ROCPROFSYS_TRACE_DELAY": "0.75",
+                "ROCPROFSYS_TRACE_DURATION": "0.75",
+            }
+        )
+        result = run_test(
+            "binary_rewrite",
+            target="trace-time-window",
+            rewrite_args=self.REWRITE_ARGS,
+            env=env,
+            timeout=120,
+        )
+
+        assert_regex(result)
+        assert_timemory(
+            result,
+            file_name="wall_clock.json",
+            metric="wall_clock",
+            labels=["outer_c", "outer_d"],
+            counts=[1, 1],
+            depths=[0, 0],
+        )
+        assert_perfetto(
+            result,
+            categories=["host"],
+            labels=["outer_c", "outer_d"],
+            counts=[1, 1],
+            depths=[0, 0],
+        )
+
+    def test_runtime_instrument(
+        self,
+        run_test,
+        time_window_env: dict[str, str],
+        assert_perfetto,
+        assert_timemory,
+        assert_regex,
+    ):
+        """Test trace time window delay with runtime instrumentation."""
+        env = time_window_env.copy()
+        env.update(
+            {
+                "ROCPROFSYS_TRACE_DELAY": "0.75",
+                "ROCPROFSYS_TRACE_DURATION": "0.75",
+            }
+        )
+
+        result = run_test(
+            "runtime_instrument",
+            target="trace-time-window",
+            instrument_args=self.RUNTIME_ARGS,
+            env=env,
+        )
+
+        assert_regex(result)
+        assert_timemory(
+            result,
+            file_name="wall_clock.json",
+            metric="wall_clock",
+            labels=["outer_c", "outer_d"],
+            counts=[1, 1],
+            depths=[0, 0],
+        )
+        assert_perfetto(
+            result,
+            categories=["host"],
+            labels=["outer_c", "outer_d"],
+            counts=[1, 1],
+            depths=[0, 0],
+        )
@@ -0,0 +1,407 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+Tests for the transpose example.
+Equivalent to rocprof-sys-rocm-tests.cmake
+    Note: MPI is not yet supported
+
+This module tests the transpose HIP example with various instrumentation modes:
+- Baseline execution (no instrumentation)
+- Sampling instrumentation
+- Binary rewrite instrumentation
+- Runtime instrumentation
+- sys-run wrapper execution
+
+It also validates outputs including:
+- Perfetto traces
+- ROCpd databases
+- ROCProfiler counter data
+"""
+
+from __future__ import annotations
+import pytest
+from pathlib import Path
+
+pytestmark = [pytest.mark.transpose, pytest.mark.gpu]
+
+from rocprofsys import (
+    GPUInfo,
+)
+
+# =============================================================================
+# Transpose fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def transpose_env() -> dict[str, str]:
+    """Environment variables for transpose tests."""
+    return {
+        "ROCPROFSYS_ROCM_DOMAINS": "hip_runtime_api,kernel_dispatch,memory_copy,memory_allocation,hsa_api"
+    }
+
+
+@pytest.fixture
+def rocprofiler_env(transpose_env: dict[str, str], gpu_info: GPUInfo) -> dict[str, str]:
+    """Environment with ROCm events configured."""
+    env = transpose_env.copy()
+    env["ROCPROFSYS_ROCM_EVENTS"] = gpu_info.rocm_events_for_test
+    return env
+
+
+@pytest.fixture
+def transpose_rules(validation_rules_dir: Path) -> list[Path]:
+    """Get validation rules files for transpose tests."""
+    rules_dir = validation_rules_dir / "transpose"
+    return [
+        validation_rules_dir / "default-rules.json",
+        rules_dir / "validation-rules.json",
+        rules_dir / "amd-smi-rules.json",
+        rules_dir / "cpu-metrics-rules.json",
+        rules_dir / "timer-sampling-rules.json",
+        rules_dir / "sdk-metrics-rules.json",
+    ]
+
+
+# ============================================================================
+# Test Class: Basic Transpose Tests
+# ============================================================================
+
+
+class TestTranspose:
+    """Basic transpose tests with all instrumentation modes."""
+
+    REWRITE_ARGS = [
+        "-e",
+        "-v",
+        "2",
+        "--print-instructions",
+        "-E",
+        "uniform_int_distribution",
+    ]
+
+    RUNTIME_ARGS = [
+        "-e",
+        "-v",
+        "1",
+        "--label",
+        "file",
+        "line",
+        "return",
+        "args",
+        "-E",
+        "uniform_int_distribution",
+    ]
+
+    def test_baseline(
+        self,
+        run_test,
+        transpose_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test("baseline", target="transpose", env=transpose_env, timeout=120)
+        assert_regex(result)
+
+    @pytest.mark.rocpd("transpose_env")
+    def test_sampling(
+        self,
+        run_test,
+        transpose_env: dict[str, str],
+        transpose_rules: list[Path],
+        assert_rocpd,
+        assert_perfetto,
+        assert_regex,
+    ):
+        result = run_test("sampling", target="transpose", env=transpose_env, timeout=120)
+        if not result.output_dir.exists():
+            pytest.fail(f"Output directory not created")
+
+        assert_regex(result)
+        assert_perfetto(
+            result,
+            subtest_name="Perfetto HIP API Call Validation",
+            categories=["hip_runtime_api"],
+        )
+        assert_rocpd(result, rules_files=transpose_rules)
+
+    def test_binary_rewrite(
+        self,
+        run_test,
+        transpose_env: dict[str, str],
+        assert_perfetto,
+        assert_regex,
+    ):
+        result = run_test(
+            "binary_rewrite",
+            target="transpose",
+            rewrite_args=self.REWRITE_ARGS,
+            env=transpose_env,
+            timeout=120,
+        )
+
+        assert_regex(result)
+        assert_perfetto(result)
+
+    def test_runtime_instrument(
+        self,
+        run_test,
+        transpose_env: dict[str, str],
+        assert_perfetto,
+        assert_regex,
+    ):
+        result = run_test(
+            "runtime_instrument",
+            target="transpose",
+            instrument_args=self.RUNTIME_ARGS,
+            env=transpose_env,
+            timeout=480,
+        )
+        assert_regex(result)
+        assert_perfetto(result)
+
+    def test_sys_run(
+        self,
+        run_test,
+        transpose_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test(
+            "sys_run",
+            target="transpose",
+            env=transpose_env,
+            timeout=300,
+        )
+        assert_regex(result)
+
+
+# ============================================================================
+# Test Class: Two Kernels Configuration
+# ============================================================================
+
+
+class TestTransposeTwoKernels:
+    """Test transpose with two kernels configuration (1 iteration, 2x2 size)."""
+
+    RUN_ARGS = ["1", "2", "2"]
+
+    def test_sampling(
+        self,
+        run_test,
+        transpose_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test(
+            "sampling",
+            target="transpose",
+            run_args=self.RUN_ARGS,
+            env=transpose_env,
+            timeout=120,
+        )
+        assert_regex(result)
+
+    def test_sys_run(
+        self,
+        run_test,
+        transpose_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test(
+            "sys_run",
+            target="transpose",
+            run_args=self.RUN_ARGS,
+            env=transpose_env,
+            timeout=300,
+        )
+        assert_regex(result)
+
+
+# ============================================================================
+# Test Class: Loop Instrumentation
+# ============================================================================
+
+
+@pytest.mark.loops
+class TestTransposeLoops:
+    """Test transpose with loop instrumentation."""
+
+    REWRITE_ARGS = [
+        "-e",
+        "-v",
+        "2",
+        "--label",
+        "return",
+        "args",
+        "-l",
+        "-i",
+        "8",
+        "-E",
+        "uniform_int_distribution",
+    ]
+
+    RUN_ARGS = ["2", "100", "50"]
+
+    def test_sampling(
+        self,
+        run_test,
+        transpose_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test(
+            "sampling",
+            target="transpose",
+            run_args=self.RUN_ARGS,
+            env=transpose_env,
+            timeout=120,
+        )
+        assert_regex(result)
+
+    def test_binary_rewrite(
+        self,
+        run_test,
+        transpose_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test(
+            "binary_rewrite",
+            target="transpose",
+            rewrite_args=self.REWRITE_ARGS,
+            run_args=self.RUN_ARGS,
+            env=transpose_env,
+            timeout=120,
+        )
+        assert_regex(result, fail_regex=["0 instrumented loops in procedure transpose"])
+
+
+# ============================================================================
+# Test Class: ROCProfiler Counter Collection
+# ============================================================================
+
+
+@pytest.mark.rocprofiler
+class TestTransposeROCProfiler:
+    """Test transpose with ROCProfiler counter collection."""
+
+    REWRITE_ARGS = [
+        "-e",
+        "-v",
+        "2",
+        "-E",
+        "uniform_int_distribution",
+    ]
+
+    def test_sampling(
+        self,
+        run_test,
+        rocprofiler_env: dict[str, str],
+        gpu_info: GPUInfo,
+        assert_perfetto,
+        assert_regex,
+        assert_file_exists,
+    ):
+        result = run_test(
+            "sampling",
+            target="transpose",
+            env=rocprofiler_env,
+            timeout=120,
+        )
+
+        assert_regex(result)
+        counter_files = [result.output_dir / f for f in gpu_info.expected_counter_files]
+        assert_file_exists(
+            counter_files, subtest_name="ROCProfiler counter files existence validation"
+        )
+        assert_perfetto(
+            result,
+            subtest_name="Perfetto counter validation",
+            counter_names=gpu_info.counter_names,
+        )
+
+    def test_binary_rewrite(
+        self,
+        run_test,
+        rocprofiler_env: dict[str, str],
+        gpu_info: GPUInfo,
+        assert_file_exists,
+        assert_regex,
+    ):
+        result = run_test(
+            "binary_rewrite",
+            target="transpose",
+            rewrite_args=self.REWRITE_ARGS,
+            env=rocprofiler_env,
+            timeout=120,
+        )
+
+        assert_regex(result)
+        counter_files = [result.output_dir / f for f in gpu_info.expected_counter_files]
+        assert_file_exists(
+            counter_files, subtest_name="ROCProfiler counter files existence validation"
+        )
+
+
+# ============================================================================
+# Parametrized Tests
+# ============================================================================
+
+
+class TestTransposeParametrized:
+    """Parametrized tests for various transpose configurations."""
+
+    @pytest.mark.parametrize(
+        "iterations,tile_dim,block_rows",
+        [
+            (1, 16, 16),
+            (2, 32, 32),
+            (5, 64, 64),
+        ],
+        ids=["small", "medium", "large"],
+    )
+    def test_transpose_configurations(
+        self,
+        run_test,
+        transpose_env: dict[str, str],
+        iterations: int,
+        tile_dim: int,
+        block_rows: int,
+        assert_regex,
+    ):
+        """Test transpose with different iteration and tile configurations."""
+        result = run_test(
+            "sampling",
+            target="transpose",
+            run_args=[str(iterations), str(tile_dim), str(block_rows)],
+            env=transpose_env,
+            timeout=120,
+            fail_message=f"Config ({iterations}, {tile_dim}, {block_rows}) failed",
+        )
+        assert_regex(result)
+
+    @pytest.mark.parametrize(
+        "runner_type,runner_kwargs",
+        [
+            ("sampling", {}),
+            ("sys_run", {}),
+        ],
+        ids=["sampling", "sys-run"],
+    )
+    def test_instrumentation_modes(
+        self,
+        run_test,
+        transpose_env: dict[str, str],
+        runner_type: str,
+        runner_kwargs: dict,
+        assert_regex,
+    ):
+        """Test different instrumentation modes produce valid output."""
+        result = run_test(
+            runner_type,
+            target="transpose",
+            env=transpose_env,
+            timeout=120,
+            **runner_kwargs,
+        )
+        if not result.output_dir.exists():
+            pytest.fail(f"Output directory not created")
+
+        assert_regex(result)
@@ -0,0 +1,116 @@
+# Copyright (c) Advanced Micro Devices, Inc.
+# SPDX-License-Identifier:  MIT
+
+"""
+Tests for the videodecode example.
+"""
+
+from __future__ import annotations
+import pytest
+
+pytestmark = [pytest.mark.gpu, pytest.mark.decode, pytest.mark.videodecode]
+
+from rocprofsys import (
+    GPUInfo,
+    RocprofsysConfig,
+)
+
+from pathlib import Path
+
+# =============================================================================
+# Video decode fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def video_decode_env() -> dict[str, str]:
+    """Environment variables for video decode tests."""
+    return {
+        "ROCPROFSYS_ROCM_DOMAINS": "hip_runtime_api,kernel_dispatch,memory_copy,rocdecode_api",
+        "ROCPROFSYS_AMD_SMI_METRICS": "busy,temp,power,vcn_activity,mem_usage",
+        "ROCPROFSYS_SAMPLING_CPUS": "none",
+    }
+
+
+@pytest.fixture
+def video_decode_rules(validation_rules_dir: Path) -> list[Path]:
+    """Get validation rules for video decode tests."""
+    rules_dir = validation_rules_dir / "video-decode"
+    return [
+        rules_dir / "validation-rules.json",
+        rules_dir / "sdk-metrics-rules.json",
+    ]
+
+
+# =============================================================================
+# Video decode tests
+# =============================================================================
+
+
+class TestVideoDecode:
+    """Tests for the videodecode example."""
+
+    @pytest.mark.rocpd("video_decode_env")
+    def test_sampling(
+        self,
+        run_test,
+        rocprof_config: RocprofsysConfig,
+        video_decode_env: dict[str, str],
+        gpu_info: GPUInfo,
+        video_decode_rules: list[Path],
+        assert_rocpd,
+        assert_perfetto,
+        assert_regex,
+    ):
+        env = video_decode_env.copy()
+        if "instinct" in gpu_info.categories:
+            rules_dir = rocprof_config.rocpd_validation_rules / "video-decode"
+            video_decode_rules.append(rules_dir / "amd-smi-rules.json")
+
+        result = run_test(
+            "sampling",
+            target="videodecode",
+            env=env,
+            timeout=120,
+            run_args=[
+                "-i",
+                str(rocprof_config.rocprofsys_examples_dir / "videos"),
+                "-t",
+                "1",
+            ],
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result)
+        assert_perfetto(
+            result,
+            categories=["rocm_rocdecode_api"],
+            labels=["rocDecCreateVideoParser"],
+            counts=[2],
+            depths=[1],
+            counter_names=["VCN Activity"] if "instinct" in gpu_info.categories else None,
+        )
+        assert_rocpd(result, rules_files=video_decode_rules)
+
+    def test_sys_run(
+        self,
+        run_test,
+        rocprof_config: RocprofsysConfig,
+        video_decode_env: dict[str, str],
+        assert_regex,
+    ):
+        result = run_test(
+            "sys_run",
+            target="videodecode",
+            env=video_decode_env,
+            timeout=120,
+            run_args=[
+                "-i",
+                str(rocprof_config.rocprofsys_examples_dir / "videos"),
+                "-t",
+                "1",
+            ],
+            no_check_target_arch=True,
+        )
+
+        assert_regex(result)