From 6076c751a3ee647e2f398ac85b791fb0d085e110 Mon Sep 17 00:00:00 2001 From: Gopesh Bhardwaj Date: Tue, 9 Apr 2024 20:49:16 +0530 Subject: [PATCH] hsa multiqueue application (#618) * hsa multiqueuw application * cmake formatting (cmake-format) (#619) Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com> * source formatting (clang-format v11) (#620) Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com> * comppialtion fix * Update tests/bin/CMakeLists.txt Reorder `add_subdirectory` to fix (recurrent) issues with ROCTx in `CMAKE_BUILD_RPATH` * addressing early feedback * cmake updates * more cmake updates * adding queue dependency test * updating test * test updates * removed hsa_api_trace header * reformating headers to prevent clang from reordering * Fixing packaging * Fixes for hangs * source formatting (clang-format v11) (#676) Co-authored-by: bwelton <1683479+bwelton@users.noreply.github.com> * cmake formatting (cmake-format) (#673) Co-authored-by: bwelton <1683479+bwelton@users.noreply.github.com> * structure change * cmake formatting (cmake-format) (#680) Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com> * Adding kernel trace to test hang fix * rebased and fixed kernel-trace test * rhel clang fixes * source formatting (clang-format v11) (#685) Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com> * Update lib/rocprofiler-sdk-tool/helper.hpp - remove suppression of -Wshadow * Update tests/bin/hsa-queue-dependency/CMakeLists.txt - cleanup unnecessary code - GPU_LIST -> GPU_TARGETS * GPU_LIST -> GPU_TARGETS * Remove installation of test executable and libraries --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com> Co-authored-by: Jonathan R. Madsen Co-authored-by: Benjamin Welton Co-authored-by: bwelton <1683479+bwelton@users.noreply.github.com> Co-authored-by: Jonathan R. Madsen [ROCm/rocprofiler-sdk commit: 348d7403889b377c54c115d29907061eb543bcf8] --- .../.github/workflows/ci_pc_sampling.yml | 6 +- .../workflows/continuous_integration.yml | 14 +- .../.github/workflows/rerun.yml | 24 +- projects/rocprofiler-sdk/README.md | 6 +- .../rocprofiler-sdk/tests/bin/CMakeLists.txt | 17 +- .../tests/bin/hip-in-libraries/CMakeLists.txt | 5 - .../bin/hsa-queue-dependency/CMakeLists.txt | 89 +++++ .../tests/bin/hsa-queue-dependency/copy.cl | 32 ++ .../hsa-queue-dependency/multiqueue_app.cpp | 289 ++++++++++++++ .../bin/hsa-queue-dependency/multiqueue_app.h | 368 ++++++++++++++++++ .../tests/bin/multistream/CMakeLists.txt | 5 - .../bin/reproducible-runtime/CMakeLists.txt | 5 - .../tests/bin/scratch-memory/CMakeLists.txt | 5 - .../tests/bin/simple-transpose/CMakeLists.txt | 5 - .../tests/bin/transpose/CMakeLists.txt | 5 - .../bin/vector-operations/CMakeLists.txt | 5 - .../tests/common/CMakeLists.txt | 18 + .../tests/lib/transpose/CMakeLists.txt | 5 - .../lib/vector-operations/CMakeLists.txt | 5 - .../tests/rocprofv3/CMakeLists.txt | 1 + .../hsa-queue-dependency/CMakeLists.txt | 67 ++++ .../hsa-queue-dependency/conftest.py | 41 ++ .../hsa-queue-dependency/validate.py | 70 ++++ .../tests/tools/CMakeLists.txt | 5 - 24 files changed, 1009 insertions(+), 83 deletions(-) create mode 100644 projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/copy.cl create mode 100644 projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/multiqueue_app.cpp create mode 100644 projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/multiqueue_app.h create mode 100644 projects/rocprofiler-sdk/tests/rocprofv3/hsa-queue-dependency/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/tests/rocprofv3/hsa-queue-dependency/conftest.py create mode 100644 projects/rocprofiler-sdk/tests/rocprofv3/hsa-queue-dependency/validate.py diff --git a/projects/rocprofiler-sdk/.github/workflows/ci_pc_sampling.yml b/projects/rocprofiler-sdk/.github/workflows/ci_pc_sampling.yml index d83f206f33..dcfc013f74 100644 --- a/projects/rocprofiler-sdk/.github/workflows/ci_pc_sampling.yml +++ b/projects/rocprofiler-sdk/.github/workflows/ci_pc_sampling.yml @@ -22,9 +22,9 @@ concurrency: cancel-in-progress: true env: - # TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_LIST, etc. with internal handling in cmake + # TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake ROCM_PATH: "/opt/rocm" - GPU_LIST: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102" + GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102" PATH: "/usr/bin:$PATH" INCLUDED_TESTS: ".*pc_sampling.*" @@ -75,7 +75,7 @@ jobs: --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }} --build-jobs 16 --site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p') - --gpu-targets ${{ env.GPU_LIST }} + --gpu-targets ${{ env.GPU_TARGETS }} ${{ matrix.ci-flags }} -- -DROCPROFILER_DEP_ROCMCORE=ON diff --git a/projects/rocprofiler-sdk/.github/workflows/continuous_integration.yml b/projects/rocprofiler-sdk/.github/workflows/continuous_integration.yml index a43ec51495..e255a53769 100644 --- a/projects/rocprofiler-sdk/.github/workflows/continuous_integration.yml +++ b/projects/rocprofiler-sdk/.github/workflows/continuous_integration.yml @@ -17,9 +17,9 @@ concurrency: cancel-in-progress: true env: - # TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_LIST, etc. with internal handling in cmake + # TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake ROCM_PATH: "/opt/rocm" - GPU_LIST: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102" + GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102" PATH: "/usr/bin:$PATH" EXCLUDED_TESTS: ".*pc_sampling.*" @@ -71,7 +71,7 @@ jobs: --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }} --build-jobs 16 --site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p') - --gpu-targets ${{ env.GPU_LIST }} + --gpu-targets ${{ env.GPU_TARGETS }} --run-attempt ${{ github.run_attempt }} ${{ matrix.ci-flags }} -- @@ -210,7 +210,7 @@ jobs: --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-codecov --build-jobs 16 --site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p') - --gpu-targets ${{ env.GPU_LIST }} + --gpu-targets ${{ env.GPU_TARGETS }} --coverage all --run-attempt ${{ github.run_attempt }} -- @@ -228,7 +228,7 @@ jobs: --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-codecov-tests --build-jobs 16 --site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p') - --gpu-targets ${{ env.GPU_LIST }} + --gpu-targets ${{ env.GPU_TARGETS }} --coverage tests --run-attempt ${{ github.run_attempt }} -- @@ -246,7 +246,7 @@ jobs: --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-codecov-samples --build-jobs 16 --site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p') - --gpu-targets ${{ env.GPU_LIST }} + --gpu-targets ${{ env.GPU_TARGETS }} --coverage samples --run-attempt ${{ github.run_attempt }} -- @@ -403,7 +403,7 @@ jobs: --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.sanitizer }} --build-jobs 16 --site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p') - --gpu-targets ${{ env.GPU_LIST }} + --gpu-targets ${{ env.GPU_TARGETS }} --memcheck ${{ matrix.sanitizer }} --run-attempt ${{ github.run_attempt }} ${{ matrix.ci-flags }} diff --git a/projects/rocprofiler-sdk/.github/workflows/rerun.yml b/projects/rocprofiler-sdk/.github/workflows/rerun.yml index 0c47a0b153..93e27fc328 100644 --- a/projects/rocprofiler-sdk/.github/workflows/rerun.yml +++ b/projects/rocprofiler-sdk/.github/workflows/rerun.yml @@ -9,9 +9,9 @@ concurrency: cancel-in-progress: true env: - # TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_LIST, etc. with internal handling in cmake + # TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake ROCM_PATH: "/opt/rocm" - GPU_LIST: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102" + GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102" PATH: "/usr/bin:$PATH" EXCLUDED_TESTS: ".*pc_sampling.*" @@ -48,7 +48,7 @@ jobs: body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details." env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - + - uses: actions/checkout@v4 - name: Install requirements @@ -78,7 +78,7 @@ jobs: --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }} --build-jobs 16 --site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p') - --gpu-targets ${{ env.GPU_LIST }} + --gpu-targets ${{ env.GPU_TARGETS }} ${{ matrix.ci-flags }} -- -DROCPROFILER_DEP_ROCMCORE=ON @@ -147,7 +147,7 @@ jobs: body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details." env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - + - uses: actions/checkout@v4 - name: Install requirements @@ -177,7 +177,7 @@ jobs: --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }} --build-jobs 16 --site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p') - --gpu-targets ${{ env.GPU_LIST }} + --gpu-targets ${{ env.GPU_TARGETS }} ${{ matrix.ci-flags }} -- -DROCPROFILER_DEP_ROCMCORE=ON @@ -246,7 +246,7 @@ jobs: body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details." env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - + - uses: actions/checkout@v4 - name: Install requirements @@ -276,7 +276,7 @@ jobs: --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }} --build-jobs 16 --site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p') - --gpu-targets ${{ env.GPU_LIST }} + --gpu-targets ${{ env.GPU_TARGETS }} ${{ matrix.ci-flags }} -- -DROCPROFILER_DEP_ROCMCORE=ON @@ -345,7 +345,7 @@ jobs: body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details." env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - + - uses: actions/checkout@v4 - name: Install requirements @@ -375,7 +375,7 @@ jobs: --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }} --build-jobs 16 --site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p') - --gpu-targets ${{ env.GPU_LIST }} + --gpu-targets ${{ env.GPU_TARGETS }} ${{ matrix.ci-flags }} -- -DROCPROFILER_DEP_ROCMCORE=ON @@ -444,7 +444,7 @@ jobs: body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details." env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - + - uses: actions/checkout@v4 - name: Install requirements @@ -474,7 +474,7 @@ jobs: --name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }} --build-jobs 16 --site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p') - --gpu-targets ${{ env.GPU_LIST }} + --gpu-targets ${{ env.GPU_TARGETS }} ${{ matrix.ci-flags }} -- -DROCPROFILER_DEP_ROCMCORE=ON diff --git a/projects/rocprofiler-sdk/README.md b/projects/rocprofiler-sdk/README.md index da8c962a08..0ed83d95e9 100644 --- a/projects/rocprofiler-sdk/README.md +++ b/projects/rocprofiler-sdk/README.md @@ -1,7 +1,7 @@ # ROCprofiler-SDK: Application Profiling, Tracing, and Performance Analysis *** -Note: rocprofiler-sdk is currently `not` supported as part of public ROCm software stack and only distributed as a beta +Note: rocprofiler-sdk is currently `not` supported as part of public ROCm software stack and only distributed as a beta release to customers. *** @@ -38,7 +38,7 @@ It can be set by the user in different location if needed. ## Build and Installation ```bash -git clone https://git@github.com:ROCm/rocprofiler-sdk-internal.git rocprofiler-sdk-source +git clone https://git@github.com:ROCm/rocprofiler-sdk-internal.git rocprofiler-sdk-source cmake \ -B rocprofiler-sdk-build \ -D ROCPROFILER_BUILD_TESTS=ON \ @@ -47,7 +47,7 @@ cmake \ -D CMAKE_INSTALL_PREFIX=/opt/rocm \ rocprofiler-sdk-source -cmake --build rocprofiler-sdk-build --target all --parallel 8 +cmake --build rocprofiler-sdk-build --target all --parallel 8 ``` To install ROCprofiler, run: diff --git a/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt index 1122f319c1..daef293032 100644 --- a/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt @@ -5,6 +5,14 @@ cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) project(rocprofiler-tests-bin LANGUAGES C CXX) +set(CMAKE_BUILD_RPATH + "\$ORIGIN:\$ORIGIN/../lib:$" + ) + +# applications used by integration tests which DO link to rocprofiler-sdk-roctx +add_subdirectory(reproducible-runtime) +add_subdirectory(transpose) + set(CMAKE_BUILD_RPATH "\$ORIGIN:\$ORIGIN/../lib") # applications used by integration tests which DO NOT link to rocprofiler-sdk-roctx @@ -13,11 +21,4 @@ add_subdirectory(multistream) add_subdirectory(vector-operations) add_subdirectory(hip-in-libraries) add_subdirectory(scratch-memory) - -set(CMAKE_BUILD_RPATH - "\$ORIGIN:\$ORIGIN/../lib:$" - ) - -# applications used by integration tests which DO link to rocprofiler-sdk-roctx -add_subdirectory(reproducible-runtime) -add_subdirectory(transpose) +add_subdirectory(hsa-queue-dependency) diff --git a/projects/rocprofiler-sdk/tests/bin/hip-in-libraries/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/hip-in-libraries/CMakeLists.txt index b01f354467..9695697ab9 100644 --- a/projects/rocprofiler-sdk/tests/bin/hip-in-libraries/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/hip-in-libraries/CMakeLists.txt @@ -27,8 +27,3 @@ if(TRANSPOSE_USE_MPI) target_compile_definitions(hip-in-libraries PRIVATE USE_MPI) target_link_libraries(hip-in-libraries PRIVATE MPI::MPI_C) endif() - -install( - TARGETS hip-in-libraries - DESTINATION bin - COMPONENT tests) diff --git a/projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/CMakeLists.txt new file mode 100644 index 0000000000..b71456cafa --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/CMakeLists.txt @@ -0,0 +1,89 @@ +# +# +# HSA multi-queue dependency test + +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project(rocprofiler-tests-bin-hsa-multiqueue-dependency LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +find_program( + amdclangpp_EXECUTABLE REQUIRED + NAMES amdclang++ + HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATH_SUFFIXES bin llvm/bin NO_CACHE) + +function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE) + separate_arguments( + CLANG_ARG_LIST + UNIX_COMMAND + "-O2 -x cl -Xclang -finclude-default-header -cl-denorms-are-zero -cl-std=CL2.0 -Wl,--build-id=sha1 + -target amdgcn-amd-amdhsa -mcpu=${TARGET_ID} -o ${OUTPUT_FILE} ${INPUT_FILE}") + add_custom_command( + OUTPUT ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} + COMMAND ${amdclangpp_EXECUTABLE} ${CLANG_ARG_LIST} + COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} + ${CMAKE_BINARY_DIR}/tests/rocprofv3/hsa-queue-dependency/${OUTPUT_FILE} + OUTPUT ${CMAKE_BINARY_DIR}/tests/rocprofv3/hsa-queue-dependency/${OUTPUT_FILE} + COMMAND + ${CMAKE_COMMAND} -E copy + ${CMAKE_BINARY_DIR}/tests/rocprofv3/hsa-queue-dependency/${OUTPUT_FILE} + ${CMAKE_BINARY_DIR}/rocprofv3/hsa-queue-dependency/${OUTPUT_FILE} + COMMENT "Building ${OUTPUT_FILE}...") + set(HSACO_TARGET_LIST + ${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} + PARENT_SCOPE) +endfunction(generate_hsaco) + +foreach(target_id ${GPU_TARGETS}) + # generate kernel bitcodes + generate_hsaco(${target_id} ${CMAKE_CURRENT_SOURCE_DIR}/copy.cl + ${target_id}_copy.hsaco) +endforeach(target_id) + +add_custom_target(generate_hsaco_targets DEPENDS ${HSACO_TARGET_LIST}) + +add_executable(multiqueue_testapp) +target_sources(multiqueue_testapp PRIVATE multiqueue_app.cpp) +target_compile_options(multiqueue_testapp PRIVATE -W -Wall -Wextra -Wshadow -Werror) + +find_package(Threads REQUIRED) +target_link_libraries(multiqueue_testapp PRIVATE stdc++fs Threads::Threads) + +find_package( + amd_comgr + REQUIRED + CONFIG + HINTS + ${CMAKE_INSTALL_PREFIX} + PATHS + ${ROCM_PATH} + PATH_SUFFIXES + lib/cmake/amd_comgr) + +target_link_libraries(multiqueue_testapp PRIVATE amd_comgr) + +find_package(rocprofiler-sdk REQUIRED) +target_link_libraries(multiqueue_testapp PRIVATE rocprofiler::rocprofiler + rocprofiler::tests-common-library) + +target_compile_definitions(multiqueue_testapp PUBLIC AMD_INTERNAL_BUILD=1) + +find_package( + hsa-runtime64 + REQUIRED + CONFIG + HINTS + ${rocm_version_DIR} + ${ROCM_PATH} + PATHS + ${rocm_version_DIR} + ${ROCM_PATH}) + +target_link_libraries(multiqueue_testapp PRIVATE hsa-runtime64::hsa-runtime64) + +add_dependencies(multiqueue_testapp generate_hsaco_targets) diff --git a/projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/copy.cl b/projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/copy.cl new file mode 100644 index 0000000000..eadc65f1c9 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/copy.cl @@ -0,0 +1,32 @@ +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +__kernel void copyA(__global unsigned int* a, __global unsigned int* b) { + uint tid = get_global_id(0); + a[tid] = b[tid]; +} +__kernel void copyB(__global unsigned int* a, __global unsigned int* b) { + uint tid = get_global_id(0); + a[tid] = b[tid]; +} +__kernel void copyC(__global unsigned int* a, __global unsigned int* b) { + uint tid = get_global_id(0); + a[tid] = b[tid]; +} diff --git a/projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/multiqueue_app.cpp b/projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/multiqueue_app.cpp new file mode 100644 index 0000000000..d4309bff1e --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/multiqueue_app.cpp @@ -0,0 +1,289 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/** ROC Profiler Multi Queue Dependency Test + * + * The goal of this test is to ensure ROC profiler does not go to deadlock + * when multiple queue are created and they are dependent on each other + * + */ + +#include "multiqueue_app.h" + +std::vector Device::all_devices; +std::vector MQDependencyTest::cpu; +std::vector MQDependencyTest::gpu; +Device::Memory MQDependencyTest::kernarg; + +int +main() +{ + hsa_status_t status; + MQDependencyTest obj; + + // Get Agent info + obj.device_discovery(); + + char agent_name[64]; + status = hsa_agent_get_info(obj.gpu[0].agent, HSA_AGENT_INFO_NAME, agent_name); + RET_IF_HSA_ERR(status) + + // Getting hasco Path + std::string hasco_file_path = std::string(agent_name) + std::string("_copy.hsaco"); + obj.search_hasco(fs::current_path(), hasco_file_path); + + MQDependencyTest::CodeObject code_object; + if(!obj.load_code_object(hasco_file_path, obj.gpu[0].agent, code_object)) + { + printf("Kernel file not found or not usable with given agent.\n"); + abort(); + } + + MQDependencyTest::Kernel copyA; + if(!obj.get_kernel(code_object, "copyA", obj.gpu[0].agent, copyA)) + { + printf("Test kernel A not found.\n"); + abort(); + } + MQDependencyTest::Kernel copyB; + if(!obj.get_kernel(code_object, "copyB", obj.gpu[0].agent, copyB)) + { + printf("Test kernel B not found.\n"); + abort(); + } + MQDependencyTest::Kernel copyC; + if(!obj.get_kernel(code_object, "copyC", obj.gpu[0].agent, copyC)) + { + printf("Test kernel C not found.\n"); + abort(); + } + + struct args_t + { + uint32_t* a; + uint32_t* b; + MQDependencyTest::OCLHiddenArgs hidden; + }; + + args_t* args; + args = static_cast(obj.hsa_malloc(sizeof(args_t), obj.kernarg)); + memset(args, 0, sizeof(args_t)); + + uint32_t* a = static_cast(obj.hsa_malloc(64 * sizeof(uint32_t), obj.kernarg)); + uint32_t* b = static_cast(obj.hsa_malloc(64 * sizeof(uint32_t), obj.kernarg)); + + memset(a, 0, 64 * sizeof(uint32_t)); + memset(b, 1, 64 * sizeof(uint32_t)); + + // Create queue in gpu agent and prepare a kernel dispatch packet + hsa_queue_t* queue1; + status = hsa_queue_create( + obj.gpu[0].agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue1); + RET_IF_HSA_ERR(status) + + // Create a signal with a value of 1 and attach it to the first kernel + // dispatch packet + hsa_signal_t completion_signal_1; + status = hsa_signal_create(1, 0, NULL, &completion_signal_1); + RET_IF_HSA_ERR(status) + + // First dispath packet on queue 1, Kernel A + { + MQDependencyTest::Aql packet{}; + packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH; + packet.header.barrier = 1; + packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM; + packet.header.release = HSA_FENCE_SCOPE_SYSTEM; + + packet.dispatch.setup = 1; + packet.dispatch.workgroup_size_x = 64; + packet.dispatch.workgroup_size_y = 1; + packet.dispatch.workgroup_size_z = 1; + packet.dispatch.grid_size_x = 64; + packet.dispatch.grid_size_y = 1; + packet.dispatch.grid_size_z = 1; + + packet.dispatch.group_segment_size = copyA.group; + packet.dispatch.private_segment_size = copyA.scratch; + packet.dispatch.kernel_object = copyA.handle; + + packet.dispatch.kernarg_address = args; + packet.dispatch.completion_signal = completion_signal_1; + + args->a = a; + args->b = b; + // Tell packet processor of A to launch the first kernel dispatch packet + obj.submit_packet(queue1, packet); + } + + // Create a signal with a value of 1 and attach it to the second kernel + // dispatch packet + hsa_signal_t completion_signal_2; + status = hsa_signal_create(1, 0, NULL, &completion_signal_2); + RET_IF_HSA_ERR(status) + + hsa_signal_t completion_signal_3; + status = hsa_signal_create(1, 0, NULL, &completion_signal_3); + RET_IF_HSA_ERR(status) + + // Create barrier-AND packet that is enqueued in queue 1 + { + MQDependencyTest::Aql packet{}; + packet.header.type = HSA_PACKET_TYPE_BARRIER_AND; + packet.header.barrier = 1; + packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM; + packet.header.release = HSA_FENCE_SCOPE_SYSTEM; + + packet.barrier_and.dep_signal[0] = completion_signal_2; + obj.submit_packet(queue1, packet); + } + + // Second dispath packet on queue 1, Kernel C + { + MQDependencyTest::Aql packet{}; + packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH; + packet.header.barrier = 1; + packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM; + packet.header.release = HSA_FENCE_SCOPE_SYSTEM; + + packet.dispatch.setup = 1; + packet.dispatch.workgroup_size_x = 64; + packet.dispatch.workgroup_size_y = 1; + packet.dispatch.workgroup_size_z = 1; + packet.dispatch.grid_size_x = 64; + packet.dispatch.grid_size_y = 1; + packet.dispatch.grid_size_z = 1; + + packet.dispatch.group_segment_size = copyC.group; + packet.dispatch.private_segment_size = copyC.scratch; + packet.dispatch.kernel_object = copyC.handle; + packet.dispatch.completion_signal = completion_signal_3; + packet.dispatch.kernarg_address = args; + + args->a = a; + args->b = b; + // Tell packet processor to launch the second kernel dispatch packet + obj.submit_packet(queue1, packet); + } + + // Create queue 2 + hsa_queue_t* queue2; + status = hsa_queue_create( + obj.gpu[0].agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue2); + RET_IF_HSA_ERR(status) + + // Create barrier-AND packet that is enqueued in queue 2 + { + MQDependencyTest::Aql packet{}; + packet.header.type = HSA_PACKET_TYPE_BARRIER_AND; + packet.header.barrier = 1; + packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM; + packet.header.release = HSA_FENCE_SCOPE_SYSTEM; + + packet.barrier_and.dep_signal[0] = completion_signal_1; + obj.submit_packet(queue2, packet); + } + + // Third dispath packet on queue 2, Kernel B + { + MQDependencyTest::Aql packet{}; + packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH; + packet.header.barrier = 1; + packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM; + packet.header.release = HSA_FENCE_SCOPE_SYSTEM; + + packet.dispatch.setup = 1; + packet.dispatch.workgroup_size_x = 64; + packet.dispatch.workgroup_size_y = 1; + packet.dispatch.workgroup_size_z = 1; + packet.dispatch.grid_size_x = 64; + packet.dispatch.grid_size_y = 1; + packet.dispatch.grid_size_z = 1; + + packet.dispatch.group_segment_size = copyB.group; + packet.dispatch.private_segment_size = copyB.scratch; + packet.dispatch.kernel_object = copyB.handle; + + packet.dispatch.kernarg_address = args; + packet.dispatch.completion_signal = completion_signal_2; + + args->a = a; + args->b = b; + // Tell packet processor to launch the third kernel dispatch packet + obj.submit_packet(queue2, packet); + } + + // Wait on the completion signal + hsa_signal_wait_relaxed( + completion_signal_1, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Wait on the completion signal + hsa_signal_wait_relaxed( + completion_signal_2, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + // Wait on the completion signal + hsa_signal_wait_relaxed( + completion_signal_3, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + + for(int i = 0; i < 64; i++) + { + if(a[i] != b[i]) + { + printf("error at %d: expected %d, got %d\n", i, b[i], a[i]); + abort(); + } + } + + // Clearing data structures and memory + status = hsa_signal_destroy(completion_signal_1); + RET_IF_HSA_ERR(status) + + status = hsa_signal_destroy(completion_signal_2); + RET_IF_HSA_ERR(status) + + status = hsa_signal_destroy(completion_signal_3); + RET_IF_HSA_ERR(status) + + if(queue1 != nullptr) + { + status = hsa_queue_destroy(queue1); + RET_IF_HSA_ERR(status) + } + + if(queue2 != nullptr) + { + status = hsa_queue_destroy(queue2); + RET_IF_HSA_ERR(status) + } + + status = hsa_memory_free(a); + RET_IF_HSA_ERR(status) + status = hsa_memory_free(b); + RET_IF_HSA_ERR(status) + + status = hsa_executable_destroy(code_object.executable); + RET_IF_HSA_ERR(status) + + status = hsa_code_object_reader_destroy(code_object.code_obj_rdr); + RET_IF_HSA_ERR(status) + close(code_object.file); +} diff --git a/projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/multiqueue_app.h b/projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/multiqueue_app.h new file mode 100644 index 0000000000..ff5776f393 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/hsa-queue-dependency/multiqueue_app.h @@ -0,0 +1,368 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "common/filesystem.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = common::fs; + +#define RET_IF_HSA_ERR(err) \ + { \ + if((err) != HSA_STATUS_SUCCESS) \ + { \ + char err_val[12]; \ + char* err_str = nullptr; \ + if(hsa_status_string(err, (const char**) &err_str) != HSA_STATUS_SUCCESS) \ + { \ + sprintf(&(err_val[0]), "%#x", (uint32_t) err); \ + err_str = &(err_val[0]); \ + } \ + printf("hsa api call failure at: %s:%d\n", __FILE__, __LINE__); \ + printf("Call returned %s\n", err_str); \ + abort(); \ + } \ + } + +struct Device +{ + struct Memory + { + hsa_amd_memory_pool_t pool; + bool fine; + bool kernarg; + size_t size; + size_t granule; + }; + + hsa_agent_t agent; + char name[64]; + std::vector pools; + uint32_t fine; + uint32_t coarse; + static std::vector all_devices; +}; + +class MQDependencyTest +{ +public: + MQDependencyTest() { hsa_init(); } + ~MQDependencyTest() { hsa_shut_down(); } + + static std::vector cpu; + static std::vector gpu; + static Device::Memory kernarg; + + struct CodeObject + { + hsa_file_t file; + hsa_code_object_reader_t code_obj_rdr; + hsa_executable_t executable; + }; + + struct Kernel + { + uint64_t handle; + uint32_t scratch; + uint32_t group; + uint32_t kernarg_size; + uint32_t kernarg_align; + }; + + union AqlHeader + { + struct + { + uint16_t type : 8; + uint16_t barrier : 1; + uint16_t acquire : 2; + uint16_t release : 2; + uint16_t reserved : 3; + }; + uint16_t raw; + }; + + struct BarrierValue + { + AqlHeader header; + uint8_t AmdFormat; + uint8_t reserved; + uint32_t reserved1; + hsa_signal_t signal; + hsa_signal_value_t value; + hsa_signal_value_t mask; + uint32_t cond; + uint32_t reserved2; + uint64_t reserved3; + uint64_t reserved4; + hsa_signal_t completion_signal; + }; + + union Aql + { + AqlHeader header; + hsa_kernel_dispatch_packet_t dispatch; + hsa_barrier_and_packet_t barrier_and; + hsa_barrier_or_packet_t barrier_or; + BarrierValue barrier_value; + }; + + struct OCLHiddenArgs + { + uint64_t offset_x; + uint64_t offset_y; + uint64_t offset_z; + void* printf_buffer; + void* enqueue; + void* enqueue2; + void* multi_grid; + }; + + bool load_code_object(std::string filename, hsa_agent_t agent, CodeObject& code_object) + { + hsa_status_t err; + code_object.file = open(filename.c_str(), O_RDONLY); + if(code_object.file == -1) + { + fprintf(stderr, "%s:%s\n", "Could not load code object", filename.c_str()); + abort(); + return false; + } + + err = hsa_code_object_reader_create_from_file(code_object.file, &code_object.code_obj_rdr); + RET_IF_HSA_ERR(err); + + err = hsa_executable_create_alt(HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + nullptr, + &code_object.executable); + RET_IF_HSA_ERR(err); + + err = hsa_executable_load_agent_code_object( + code_object.executable, agent, code_object.code_obj_rdr, nullptr, nullptr); + if(err != HSA_STATUS_SUCCESS) return false; + + err = hsa_executable_freeze(code_object.executable, nullptr); + RET_IF_HSA_ERR(err); + + return true; + } + + bool get_kernel(const CodeObject& code_object, + std::string kernel, + hsa_agent_t agent, + Kernel& kern) + { + hsa_executable_symbol_t symbol; + hsa_status_t err = hsa_executable_get_symbol_by_name( + code_object.executable, kernel.c_str(), &agent, &symbol); + if(err != HSA_STATUS_SUCCESS) + { + err = hsa_executable_get_symbol_by_name( + code_object.executable, (kernel + ".kd").c_str(), &agent, &symbol); + if(err != HSA_STATUS_SUCCESS) + { + return false; + } + } + printf("\nkernel-name: %s\n", kernel.c_str()); + err = hsa_executable_symbol_get_info( + symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kern.handle); + RET_IF_HSA_ERR(err); + + return true; + } + + // Not for parallel insertion. + bool submit_packet(hsa_queue_t* queue, Aql& pkt) + { + size_t mask = queue->size - 1; + Aql* ring = static_cast(queue->base_address); + + uint64_t write = hsa_queue_load_write_index_relaxed(queue); + uint64_t read = hsa_queue_load_read_index_relaxed(queue); + if(write - read + 1 > queue->size) return false; + + Aql& dst = ring[write & mask]; + + uint16_t header = pkt.header.raw; + pkt.header.raw = dst.header.raw; + dst = pkt; + __atomic_store_n(&dst.header.raw, header, __ATOMIC_RELEASE); + pkt.header.raw = header; + + hsa_queue_store_write_index_release(queue, write + 1); + hsa_signal_store_screlease(queue->doorbell_signal, write); + + return true; + } + + void* hsa_malloc(size_t size, const Device::Memory& mem) + { + void* ret; + hsa_status_t err = hsa_amd_memory_pool_allocate(mem.pool, size, 0, &ret); + RET_IF_HSA_ERR(err); + + err = hsa_amd_agents_allow_access( + Device::all_devices.size(), &Device::all_devices[0], nullptr, ret); + RET_IF_HSA_ERR(err); + return ret; + } + + void* hsa_malloc(size_t size, const Device& dev, bool fine) + { + uint32_t index = fine ? dev.fine : dev.coarse; + assert(index != -1u && "Memory type unavailable."); + return hsa_malloc(size, dev.pools[index]); + } + + bool device_discovery() + { + hsa_status_t err; + + err = hsa_iterate_agents( + [](hsa_agent_t agent, void*) { + hsa_status_t error; + + Device dev; + dev.agent = agent; + + dev.fine = -1u; + dev.coarse = -1u; + + error = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, dev.name); + RET_IF_HSA_ERR(error) + + hsa_device_type_t type; + error = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); + RET_IF_HSA_ERR(error) + + error = hsa_amd_agent_iterate_memory_pools( + agent, + [](hsa_amd_memory_pool_t pool, void* data) { + std::vector& pools = + *reinterpret_cast*>(data); + hsa_status_t status; + + hsa_amd_segment_t segment; + status = hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); + RET_IF_HSA_ERR(status) + + if(segment != HSA_AMD_SEGMENT_GLOBAL) return HSA_STATUS_SUCCESS; + + uint32_t flags; + status = hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flags); + RET_IF_HSA_ERR(status) + + Device::Memory mem; + mem.pool = pool; + mem.fine = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED); + mem.kernarg = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT); + + status = hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &mem.size); + RET_IF_HSA_ERR(status) + + status = hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &mem.granule); + RET_IF_HSA_ERR(status) + + pools.push_back(mem); + return HSA_STATUS_SUCCESS; + }, + static_cast(&dev.pools)); + + if(!dev.pools.empty()) + { + for(size_t i = 0; i < dev.pools.size(); i++) + { + if(dev.pools[i].fine && dev.pools[i].kernarg && dev.fine == -1u) + dev.fine = i; + if(dev.pools[i].fine && !dev.pools[i].kernarg) dev.fine = i; + if(!dev.pools[i].fine) dev.coarse = i; + } + + if(type == HSA_DEVICE_TYPE_CPU) + cpu.push_back(dev); + else + gpu.push_back(dev); + + Device::all_devices.push_back(dev.agent); + } + + return HSA_STATUS_SUCCESS; + }, + nullptr); + + []() { + for(auto& dev : cpu) + { + for(auto& mem : dev.pools) + { + if(mem.fine && mem.kernarg) + { + kernarg = mem; + return; + } + } + } + }(); + RET_IF_HSA_ERR(err); + + if(cpu.empty() || gpu.empty() || kernarg.pool.handle == 0) return false; + return true; + } + + void search_hasco(const fs::path& directory, std::string& filename) + { + for(const auto& entry : fs::directory_iterator(directory)) + { + if(fs::is_regular_file(entry)) + { + if(entry.path().filename() == filename) + { + filename = entry.path(); + } + } + else if(fs::is_directory(entry)) + { + search_hasco(entry, filename); // Recursive call for subdirectories + } + } + } +}; diff --git a/projects/rocprofiler-sdk/tests/bin/multistream/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/multistream/CMakeLists.txt index 0ced0f632e..147694b85c 100644 --- a/projects/rocprofiler-sdk/tests/bin/multistream/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/multistream/CMakeLists.txt @@ -39,8 +39,3 @@ target_compile_options(multistream PRIVATE -W -Wall -Wextra -Wpedantic -Wshadow find_package(Threads REQUIRED) target_link_libraries(multistream PRIVATE Threads::Threads) - -install( - TARGETS multistream - DESTINATION bin - COMPONENT tests) diff --git a/projects/rocprofiler-sdk/tests/bin/reproducible-runtime/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/reproducible-runtime/CMakeLists.txt index ea66c56c1f..5302eeb194 100644 --- a/projects/rocprofiler-sdk/tests/bin/reproducible-runtime/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/reproducible-runtime/CMakeLists.txt @@ -52,8 +52,3 @@ if(REPRODUCIBLE_RUNTIME_USE_MPI) target_compile_definitions(reproducible-runtime PRIVATE USE_MPI) target_link_libraries(reproducible-runtime PRIVATE MPI::MPI_C) endif() - -install( - TARGETS reproducible-runtime - DESTINATION bin - COMPONENT tests) diff --git a/projects/rocprofiler-sdk/tests/bin/scratch-memory/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/scratch-memory/CMakeLists.txt index 1fddba6c9d..0997ead8e6 100644 --- a/projects/rocprofiler-sdk/tests/bin/scratch-memory/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/scratch-memory/CMakeLists.txt @@ -40,8 +40,3 @@ target_compile_options(scratch-memory PRIVATE -W -Wall -Wextra -Wpedantic -Wshad find_package(Threads REQUIRED) target_link_libraries(scratch-memory PRIVATE Threads::Threads hsa-runtime64) - -install( - TARGETS scratch-memory - DESTINATION bin - COMPONENT tests) diff --git a/projects/rocprofiler-sdk/tests/bin/simple-transpose/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/simple-transpose/CMakeLists.txt index 7d2b3240d7..6b065f6abb 100644 --- a/projects/rocprofiler-sdk/tests/bin/simple-transpose/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/simple-transpose/CMakeLists.txt @@ -44,8 +44,3 @@ target_link_libraries(simple-transpose PRIVATE Threads::Threads) find_package(rocprofiler-sdk-roctx REQUIRED) target_link_libraries(simple-transpose PRIVATE rocprofiler-sdk-roctx::rocprofiler-sdk-roctx) - -install( - TARGETS simple-transpose - DESTINATION bin - COMPONENT tests) diff --git a/projects/rocprofiler-sdk/tests/bin/transpose/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/transpose/CMakeLists.txt index e9cebfeba5..f2bc686688 100644 --- a/projects/rocprofiler-sdk/tests/bin/transpose/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/transpose/CMakeLists.txt @@ -50,8 +50,3 @@ if(TRANSPOSE_USE_MPI) target_compile_definitions(transpose PRIVATE USE_MPI) target_link_libraries(transpose PRIVATE MPI::MPI_C) endif() - -install( - TARGETS transpose - DESTINATION bin - COMPONENT tests) diff --git a/projects/rocprofiler-sdk/tests/bin/vector-operations/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/vector-operations/CMakeLists.txt index 43c99ecc4d..dc508eae99 100644 --- a/projects/rocprofiler-sdk/tests/bin/vector-operations/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/vector-operations/CMakeLists.txt @@ -39,8 +39,3 @@ target_compile_options(vector-ops PRIVATE -W -Wall -Wextra -Wpedantic -Wshadow - find_package(Threads REQUIRED) target_link_libraries(vector-ops PRIVATE Threads::Threads) - -install( - TARGETS vector-ops - DESTINATION bin - COMPONENT tests) diff --git a/projects/rocprofiler-sdk/tests/common/CMakeLists.txt b/projects/rocprofiler-sdk/tests/common/CMakeLists.txt index 36750706b9..fd772ba7f2 100644 --- a/projects/rocprofiler-sdk/tests/common/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/common/CMakeLists.txt @@ -11,6 +11,24 @@ set(ROCPROFILER_DEFAULT_FAIL_REGEX "threw an exception|Permission denied|Could not create logging file" CACHE STRING "Default FAIL_REGULAR_EXPRESSION for tests") +set(DEFAULT_GPU_TARGETS + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx940" + "gfx941" + "gfx942" + "gfx1030" + "gfx1010" + "gfx1100" + "gfx1101" + "gfx1102") + +set(GPU_TARGETS + "${DEFAULT_GPU_TARGETS}" + CACHE STRING "GPU targets to compile for") + # build flags add_library(rocprofiler-tests-build-flags INTERFACE) add_library(rocprofiler::tests-build-flags ALIAS rocprofiler-tests-build-flags) diff --git a/projects/rocprofiler-sdk/tests/lib/transpose/CMakeLists.txt b/projects/rocprofiler-sdk/tests/lib/transpose/CMakeLists.txt index ae69cae770..afa4baf173 100644 --- a/projects/rocprofiler-sdk/tests/lib/transpose/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/lib/transpose/CMakeLists.txt @@ -54,8 +54,3 @@ if(TRANSPOSE_USE_MPI) target_compile_definitions(transpose-shared-library PRIVATE USE_MPI) target_link_libraries(transpose-shared-library PRIVATE MPI::MPI_C) endif() - -install( - TARGETS transpose-shared-library - DESTINATION lib - COMPONENT tests) diff --git a/projects/rocprofiler-sdk/tests/lib/vector-operations/CMakeLists.txt b/projects/rocprofiler-sdk/tests/lib/vector-operations/CMakeLists.txt index e550d8dd76..f9ebb9724c 100644 --- a/projects/rocprofiler-sdk/tests/lib/vector-operations/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/lib/vector-operations/CMakeLists.txt @@ -42,8 +42,3 @@ set_target_properties(vector-ops-shared-library PROPERTIES OUTPUT_NAME vector-op find_package(Threads REQUIRED) target_link_libraries(vector-ops-shared-library PRIVATE Threads::Threads) - -install( - TARGETS vector-ops-shared-library - DESTINATION lib - COMPONENT tests) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt index de533f4c17..dd3dc40427 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt @@ -27,3 +27,4 @@ add_subdirectory(tracing) add_subdirectory(tracing-plus-cc) add_subdirectory(tracing-hip-in-libraries) add_subdirectory(counter-collection) +add_subdirectory(hsa-queue-dependency) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/hsa-queue-dependency/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/hsa-queue-dependency/CMakeLists.txt new file mode 100644 index 0000000000..833868ec86 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/hsa-queue-dependency/CMakeLists.txt @@ -0,0 +1,67 @@ +# +# rocprofv3 tool test +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-tests-rocprofv3-hsa-queue-tracing + LANGUAGES CXX + VERSION 0.0.0) + +string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}") + +if(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer") + set(LOG_LEVEL "warning") # info produces memory leak +else() + set(LOG_LEVEL "info") +endif() + +set(tracing-env + "${PRELOAD_ENV}" "ROCPROF_LOG_LEVEL=${LOG_LEVEL}" + "ROCPROFILER_LOG_LEVEL=${LOG_LEVEL}" + "HSA_TOOLS_LIB=$") + +foreach(FILENAME validate.py conftest.py) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME} + ${CMAKE_CURRENT_BINARY_DIR}/${FILENAME} COPYONLY) +endforeach() + +find_package(rocprofiler-sdk REQUIRED) + +# hsa multiqueue dependency test +add_test( + NAME rocprofv3-test-hsa-multiqueue-execute + COMMAND + $ --hsa-trace --kernel-trace -d + ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace -o out $) + +set_tests_properties( + rocprofv3-test-hsa-multiqueue-execute + PROPERTIES LABELS "integration-tests" ENVIRONMENT "${tracing-env}" + FAIL_REGULAR_EXPRESSION "HSA_API|HIP_API") + +add_test( + NAME rocprofv3-test-hsa-multiqueue-validate + COMMAND + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --hsa-trace-input + ${CMAKE_CURRENT_BINARY_DIR}/multiqueue_testapp-trace/out_hsa_api_trace.csv + --kernel-trace-input + ${CMAKE_CURRENT_BINARY_DIR}/multiqueue_testapp-trace/out_kernel_trace.csv) + +set(MULTIQUEUE_VALIDATION_FILES + ${CMAKE_CURRENT_BINARY_DIR}/multiqueue_testapp-trace/out_hsa_api_trace.csv + ${CMAKE_CURRENT_BINARY_DIR}/multiqueue_testapp-trace/out_kernel_api_trace.csv) + +set_tests_properties( + rocprofv3-test-hsa-multiqueue-validate + PROPERTIES TIMEOUT + 45 + LABELS + "integration-tests" + DEPENDS + rocprofv3-test-hsa-multiqueue-execute + FAIL_REGULAR_EXPRESSION + "AssertionError" + ATTACHED_FILES_ON_FAIL + "${MULTIQUEUE_VALIDATION_FILES}") diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/hsa-queue-dependency/conftest.py b/projects/rocprofiler-sdk/tests/rocprofv3/hsa-queue-dependency/conftest.py new file mode 100644 index 0000000000..6725dbbb32 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/hsa-queue-dependency/conftest.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +import csv +import pytest + + +def pytest_addoption(parser): + parser.addoption( + "--hsa-trace-input", + action="store", + help="Path to HSA API tracing CSV file.", + ) + parser.addoption( + "--kernel-trace-input", + action="store", + help="Path to Kernel API tracing CSV file.", + ) + + +@pytest.fixture +def hsa_trace_input_data(request): + filename = request.config.getoption("--hsa-trace-input") + data = [] + with open(filename, "r") as inp: + reader = csv.DictReader(inp) + for row in reader: + data.append(row) + + return data + + +@pytest.fixture +def kernel_trace_input_data(request): + filename = request.config.getoption("--kernel-trace-input") + data = [] + with open(filename, "r") as inp: + reader = csv.DictReader(inp) + for row in reader: + data.append(row) + + return data diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/hsa-queue-dependency/validate.py b/projects/rocprofiler-sdk/tests/rocprofv3/hsa-queue-dependency/validate.py new file mode 100644 index 0000000000..b09250fe04 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/hsa-queue-dependency/validate.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +import sys +import pytest + + +def test_hsa_api_trace(hsa_trace_input_data): + functions = [] + correlation_ids = [] + for row in hsa_trace_input_data: + assert row["Domain"] in ( + "HSA_CORE_API", + "HSA_AMD_EXT_API", + "HSA_IMAGE_EXT_API", + "HSA_FINALIZE_EXT_API", + ) + assert int(row["Process_Id"]) > 0 + assert int(row["Thread_Id"]) >= int(row["Process_Id"]) + assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) + functions.append(row["Function"]) + correlation_ids.append(int(row["Correlation_Id"])) + + correlation_ids = sorted(list(set(correlation_ids))) + + # deterministic call counts + num_queue_create_calls = 2 + num_queue_destroy_calls = 2 + num_hsa_mem_free_calls = 2 + + # signal create/destroy calls + # although the app explicitly only creates 3 signals + # but hsa_init() internally calls hsa_signal_create + num_hsa_signal_create_calls = 4 + num_hsa_signal_destroy_calls = 4 + + # all correlation ids are unique + assert len(correlation_ids) == len(hsa_trace_input_data) + + functions = list(functions) + assert "hsa_shut_down" in functions + assert functions.count("hsa_queue_create") == num_queue_create_calls + assert functions.count("hsa_queue_destroy") == num_queue_destroy_calls + assert functions.count("hsa_memory_free") == num_hsa_mem_free_calls + assert functions.count("hsa_signal_create") == num_hsa_signal_create_calls + assert functions.count("hsa_signal_destroy") == num_hsa_signal_destroy_calls + + +def test_kernel_trace(kernel_trace_input_data): + valid_kernel_names = ("copyA", "copyB", "copyC") + + assert len(kernel_trace_input_data) == 3 + for row in kernel_trace_input_data: + assert row["Kind"] == "KERNEL_DISPATCH" + assert int(row["Agent_Id"]) > 0 + assert int(row["Queue_Id"]) > 0 + assert int(row["Kernel_Id"]) > 0 + assert row["Kernel_Name"] in valid_kernel_names + assert int(row["Correlation_Id"]) > 0 + assert int(row["Workgroup_Size_X"]) == 64 + assert int(row["Workgroup_Size_Y"]) == 1 + assert int(row["Workgroup_Size_Z"]) == 1 + assert int(row["Grid_Size_X"]) == 64 + assert int(row["Grid_Size_Y"]) == 1 + assert int(row["Grid_Size_Z"]) == 1 + assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) + + +if __name__ == "__main__": + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code) diff --git a/projects/rocprofiler-sdk/tests/tools/CMakeLists.txt b/projects/rocprofiler-sdk/tests/tools/CMakeLists.txt index 9ced4b6074..a61abd974e 100644 --- a/projects/rocprofiler-sdk/tests/tools/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/tools/CMakeLists.txt @@ -25,11 +25,6 @@ set_target_properties( ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} INSTALL_RPATH "\$ORIGIN:\$ORIGIN/..") -install( - TARGETS rocprofiler-sdk-json-tool - DESTINATION lib/rocprofiler-sdk - COMPONENT tests) - # tool library which just checks that tools can be compiled with C language add_library(rocprofiler-sdk-c-tool SHARED) target_sources(rocprofiler-sdk-c-tool PRIVATE c-tool.c)