* hsa multiqueuw application

* cmake formatting (cmake-format) (#619)

Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com>

* source formatting (clang-format v11) (#620)

Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com>

* comppialtion fix

* Update tests/bin/CMakeLists.txt

Reorder `add_subdirectory` to fix (recurrent) issues with ROCTx in `CMAKE_BUILD_RPATH`

* addressing early feedback

* cmake updates

* more cmake updates

* adding queue dependency test

* updating test

* test updates

* removed hsa_api_trace header

* reformating headers to prevent clang from reordering

* Fixing packaging

* Fixes for hangs

* source formatting (clang-format v11) (#676)

Co-authored-by: bwelton <1683479+bwelton@users.noreply.github.com>

* cmake formatting (cmake-format) (#673)

Co-authored-by: bwelton <1683479+bwelton@users.noreply.github.com>

* structure change

* cmake formatting (cmake-format) (#680)

Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com>

* Adding kernel trace to test hang fix

* rebased and fixed kernel-trace test

* rhel clang fixes

* source formatting (clang-format v11) (#685)

Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com>

* Update lib/rocprofiler-sdk-tool/helper.hpp

- remove suppression of -Wshadow

* Update tests/bin/hsa-queue-dependency/CMakeLists.txt

- cleanup unnecessary code
- GPU_LIST -> GPU_TARGETS

* GPU_LIST -> GPU_TARGETS

* Remove installation of test executable and libraries

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com>
Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com>
Co-authored-by: Benjamin Welton <bewelton@amd.com>
Co-authored-by: bwelton <1683479+bwelton@users.noreply.github.com>
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>

[ROCm/rocprofiler-sdk commit: 348d740388]
Этот коммит содержится в:
Gopesh Bhardwaj
2024-04-09 20:49:16 +05:30
коммит произвёл GitHub
родитель 73ff4f2502
Коммит 6076c751a3
24 изменённых файлов: 1009 добавлений и 83 удалений
+3 -3
Просмотреть файл
@@ -22,9 +22,9 @@ concurrency:
cancel-in-progress: true
env:
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_LIST, etc. with internal handling in cmake
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake
ROCM_PATH: "/opt/rocm"
GPU_LIST: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
PATH: "/usr/bin:$PATH"
INCLUDED_TESTS: ".*pc_sampling.*"
@@ -75,7 +75,7 @@ jobs:
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
--build-jobs 16
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
--gpu-targets ${{ env.GPU_LIST }}
--gpu-targets ${{ env.GPU_TARGETS }}
${{ matrix.ci-flags }}
--
-DROCPROFILER_DEP_ROCMCORE=ON
+7 -7
Просмотреть файл
@@ -17,9 +17,9 @@ concurrency:
cancel-in-progress: true
env:
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_LIST, etc. with internal handling in cmake
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake
ROCM_PATH: "/opt/rocm"
GPU_LIST: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
PATH: "/usr/bin:$PATH"
EXCLUDED_TESTS: ".*pc_sampling.*"
@@ -71,7 +71,7 @@ jobs:
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
--build-jobs 16
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
--gpu-targets ${{ env.GPU_LIST }}
--gpu-targets ${{ env.GPU_TARGETS }}
--run-attempt ${{ github.run_attempt }}
${{ matrix.ci-flags }}
--
@@ -210,7 +210,7 @@ jobs:
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-codecov
--build-jobs 16
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
--gpu-targets ${{ env.GPU_LIST }}
--gpu-targets ${{ env.GPU_TARGETS }}
--coverage all
--run-attempt ${{ github.run_attempt }}
--
@@ -228,7 +228,7 @@ jobs:
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-codecov-tests
--build-jobs 16
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
--gpu-targets ${{ env.GPU_LIST }}
--gpu-targets ${{ env.GPU_TARGETS }}
--coverage tests
--run-attempt ${{ github.run_attempt }}
--
@@ -246,7 +246,7 @@ jobs:
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-codecov-samples
--build-jobs 16
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
--gpu-targets ${{ env.GPU_LIST }}
--gpu-targets ${{ env.GPU_TARGETS }}
--coverage samples
--run-attempt ${{ github.run_attempt }}
--
@@ -403,7 +403,7 @@ jobs:
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.sanitizer }}
--build-jobs 16
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
--gpu-targets ${{ env.GPU_LIST }}
--gpu-targets ${{ env.GPU_TARGETS }}
--memcheck ${{ matrix.sanitizer }}
--run-attempt ${{ github.run_attempt }}
${{ matrix.ci-flags }}
+12 -12
Просмотреть файл
@@ -9,9 +9,9 @@ concurrency:
cancel-in-progress: true
env:
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_LIST, etc. with internal handling in cmake
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake
ROCM_PATH: "/opt/rocm"
GPU_LIST: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
PATH: "/usr/bin:$PATH"
EXCLUDED_TESTS: ".*pc_sampling.*"
@@ -48,7 +48,7 @@ jobs:
body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details."
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v4
- name: Install requirements
@@ -78,7 +78,7 @@ jobs:
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
--build-jobs 16
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
--gpu-targets ${{ env.GPU_LIST }}
--gpu-targets ${{ env.GPU_TARGETS }}
${{ matrix.ci-flags }}
--
-DROCPROFILER_DEP_ROCMCORE=ON
@@ -147,7 +147,7 @@ jobs:
body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details."
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v4
- name: Install requirements
@@ -177,7 +177,7 @@ jobs:
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
--build-jobs 16
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
--gpu-targets ${{ env.GPU_LIST }}
--gpu-targets ${{ env.GPU_TARGETS }}
${{ matrix.ci-flags }}
--
-DROCPROFILER_DEP_ROCMCORE=ON
@@ -246,7 +246,7 @@ jobs:
body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details."
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v4
- name: Install requirements
@@ -276,7 +276,7 @@ jobs:
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
--build-jobs 16
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
--gpu-targets ${{ env.GPU_LIST }}
--gpu-targets ${{ env.GPU_TARGETS }}
${{ matrix.ci-flags }}
--
-DROCPROFILER_DEP_ROCMCORE=ON
@@ -345,7 +345,7 @@ jobs:
body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details."
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v4
- name: Install requirements
@@ -375,7 +375,7 @@ jobs:
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
--build-jobs 16
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
--gpu-targets ${{ env.GPU_LIST }}
--gpu-targets ${{ env.GPU_TARGETS }}
${{ matrix.ci-flags }}
--
-DROCPROFILER_DEP_ROCMCORE=ON
@@ -444,7 +444,7 @@ jobs:
body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details."
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v4
- name: Install requirements
@@ -474,7 +474,7 @@ jobs:
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
--build-jobs 16
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
--gpu-targets ${{ env.GPU_LIST }}
--gpu-targets ${{ env.GPU_TARGETS }}
${{ matrix.ci-flags }}
--
-DROCPROFILER_DEP_ROCMCORE=ON
+3 -3
Просмотреть файл
@@ -1,7 +1,7 @@
# ROCprofiler-SDK: Application Profiling, Tracing, and Performance Analysis
***
Note: rocprofiler-sdk is currently `not` supported as part of public ROCm software stack and only distributed as a beta
Note: rocprofiler-sdk is currently `not` supported as part of public ROCm software stack and only distributed as a beta
release to customers.
***
@@ -38,7 +38,7 @@ It can be set by the user in different location if needed.
## Build and Installation
```bash
git clone https://git@github.com:ROCm/rocprofiler-sdk-internal.git rocprofiler-sdk-source
git clone https://git@github.com:ROCm/rocprofiler-sdk-internal.git rocprofiler-sdk-source
cmake \
-B rocprofiler-sdk-build \
-D ROCPROFILER_BUILD_TESTS=ON \
@@ -47,7 +47,7 @@ cmake \
-D CMAKE_INSTALL_PREFIX=/opt/rocm \
rocprofiler-sdk-source
cmake --build rocprofiler-sdk-build --target all --parallel 8
cmake --build rocprofiler-sdk-build --target all --parallel 8
```
To install ROCprofiler, run:
+9 -8
Просмотреть файл
@@ -5,6 +5,14 @@ cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(rocprofiler-tests-bin LANGUAGES C CXX)
set(CMAKE_BUILD_RPATH
"\$ORIGIN:\$ORIGIN/../lib:$<TARGET_FILE_DIR:rocprofiler-sdk-roctx::rocprofiler-sdk-roctx-shared-library>"
)
# applications used by integration tests which DO link to rocprofiler-sdk-roctx
add_subdirectory(reproducible-runtime)
add_subdirectory(transpose)
set(CMAKE_BUILD_RPATH "\$ORIGIN:\$ORIGIN/../lib")
# applications used by integration tests which DO NOT link to rocprofiler-sdk-roctx
@@ -13,11 +21,4 @@ add_subdirectory(multistream)
add_subdirectory(vector-operations)
add_subdirectory(hip-in-libraries)
add_subdirectory(scratch-memory)
set(CMAKE_BUILD_RPATH
"\$ORIGIN:\$ORIGIN/../lib:$<TARGET_FILE_DIR:rocprofiler-sdk-roctx::rocprofiler-sdk-roctx-shared-library>"
)
# applications used by integration tests which DO link to rocprofiler-sdk-roctx
add_subdirectory(reproducible-runtime)
add_subdirectory(transpose)
add_subdirectory(hsa-queue-dependency)
-5
Просмотреть файл
@@ -27,8 +27,3 @@ if(TRANSPOSE_USE_MPI)
target_compile_definitions(hip-in-libraries PRIVATE USE_MPI)
target_link_libraries(hip-in-libraries PRIVATE MPI::MPI_C)
endif()
install(
TARGETS hip-in-libraries
DESTINATION bin
COMPONENT tests)
+89
Просмотреть файл
@@ -0,0 +1,89 @@
#
#
# HSA multi-queue dependency test
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(rocprofiler-tests-bin-hsa-multiqueue-dependency LANGUAGES CXX)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
find_program(
amdclangpp_EXECUTABLE REQUIRED
NAMES amdclang++
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATH_SUFFIXES bin llvm/bin NO_CACHE)
function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE)
separate_arguments(
CLANG_ARG_LIST
UNIX_COMMAND
"-O2 -x cl -Xclang -finclude-default-header -cl-denorms-are-zero -cl-std=CL2.0 -Wl,--build-id=sha1
-target amdgcn-amd-amdhsa -mcpu=${TARGET_ID} -o ${OUTPUT_FILE} ${INPUT_FILE}")
add_custom_command(
OUTPUT ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
COMMAND ${amdclangpp_EXECUTABLE} ${CLANG_ARG_LIST}
COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
${CMAKE_BINARY_DIR}/tests/rocprofv3/hsa-queue-dependency/${OUTPUT_FILE}
OUTPUT ${CMAKE_BINARY_DIR}/tests/rocprofv3/hsa-queue-dependency/${OUTPUT_FILE}
COMMAND
${CMAKE_COMMAND} -E copy
${CMAKE_BINARY_DIR}/tests/rocprofv3/hsa-queue-dependency/${OUTPUT_FILE}
${CMAKE_BINARY_DIR}/rocprofv3/hsa-queue-dependency/${OUTPUT_FILE}
COMMENT "Building ${OUTPUT_FILE}...")
set(HSACO_TARGET_LIST
${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
PARENT_SCOPE)
endfunction(generate_hsaco)
foreach(target_id ${GPU_TARGETS})
# generate kernel bitcodes
generate_hsaco(${target_id} ${CMAKE_CURRENT_SOURCE_DIR}/copy.cl
${target_id}_copy.hsaco)
endforeach(target_id)
add_custom_target(generate_hsaco_targets DEPENDS ${HSACO_TARGET_LIST})
add_executable(multiqueue_testapp)
target_sources(multiqueue_testapp PRIVATE multiqueue_app.cpp)
target_compile_options(multiqueue_testapp PRIVATE -W -Wall -Wextra -Wshadow -Werror)
find_package(Threads REQUIRED)
target_link_libraries(multiqueue_testapp PRIVATE stdc++fs Threads::Threads)
find_package(
amd_comgr
REQUIRED
CONFIG
HINTS
${CMAKE_INSTALL_PREFIX}
PATHS
${ROCM_PATH}
PATH_SUFFIXES
lib/cmake/amd_comgr)
target_link_libraries(multiqueue_testapp PRIVATE amd_comgr)
find_package(rocprofiler-sdk REQUIRED)
target_link_libraries(multiqueue_testapp PRIVATE rocprofiler::rocprofiler
rocprofiler::tests-common-library)
target_compile_definitions(multiqueue_testapp PUBLIC AMD_INTERNAL_BUILD=1)
find_package(
hsa-runtime64
REQUIRED
CONFIG
HINTS
${rocm_version_DIR}
${ROCM_PATH}
PATHS
${rocm_version_DIR}
${ROCM_PATH})
target_link_libraries(multiqueue_testapp PRIVATE hsa-runtime64::hsa-runtime64)
add_dependencies(multiqueue_testapp generate_hsaco_targets)
+32
Просмотреть файл
@@ -0,0 +1,32 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
__kernel void copyA(__global unsigned int* a, __global unsigned int* b) {
uint tid = get_global_id(0);
a[tid] = b[tid];
}
__kernel void copyB(__global unsigned int* a, __global unsigned int* b) {
uint tid = get_global_id(0);
a[tid] = b[tid];
}
__kernel void copyC(__global unsigned int* a, __global unsigned int* b) {
uint tid = get_global_id(0);
a[tid] = b[tid];
}
+289
Просмотреть файл
@@ -0,0 +1,289 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
/** ROC Profiler Multi Queue Dependency Test
*
* The goal of this test is to ensure ROC profiler does not go to deadlock
* when multiple queue are created and they are dependent on each other
*
*/
#include "multiqueue_app.h"
std::vector<hsa_agent_t> Device::all_devices;
std::vector<Device> MQDependencyTest::cpu;
std::vector<Device> MQDependencyTest::gpu;
Device::Memory MQDependencyTest::kernarg;
int
main()
{
hsa_status_t status;
MQDependencyTest obj;
// Get Agent info
obj.device_discovery();
char agent_name[64];
status = hsa_agent_get_info(obj.gpu[0].agent, HSA_AGENT_INFO_NAME, agent_name);
RET_IF_HSA_ERR(status)
// Getting hasco Path
std::string hasco_file_path = std::string(agent_name) + std::string("_copy.hsaco");
obj.search_hasco(fs::current_path(), hasco_file_path);
MQDependencyTest::CodeObject code_object;
if(!obj.load_code_object(hasco_file_path, obj.gpu[0].agent, code_object))
{
printf("Kernel file not found or not usable with given agent.\n");
abort();
}
MQDependencyTest::Kernel copyA;
if(!obj.get_kernel(code_object, "copyA", obj.gpu[0].agent, copyA))
{
printf("Test kernel A not found.\n");
abort();
}
MQDependencyTest::Kernel copyB;
if(!obj.get_kernel(code_object, "copyB", obj.gpu[0].agent, copyB))
{
printf("Test kernel B not found.\n");
abort();
}
MQDependencyTest::Kernel copyC;
if(!obj.get_kernel(code_object, "copyC", obj.gpu[0].agent, copyC))
{
printf("Test kernel C not found.\n");
abort();
}
struct args_t
{
uint32_t* a;
uint32_t* b;
MQDependencyTest::OCLHiddenArgs hidden;
};
args_t* args;
args = static_cast<args_t*>(obj.hsa_malloc(sizeof(args_t), obj.kernarg));
memset(args, 0, sizeof(args_t));
uint32_t* a = static_cast<uint32_t*>(obj.hsa_malloc(64 * sizeof(uint32_t), obj.kernarg));
uint32_t* b = static_cast<uint32_t*>(obj.hsa_malloc(64 * sizeof(uint32_t), obj.kernarg));
memset(a, 0, 64 * sizeof(uint32_t));
memset(b, 1, 64 * sizeof(uint32_t));
// Create queue in gpu agent and prepare a kernel dispatch packet
hsa_queue_t* queue1;
status = hsa_queue_create(
obj.gpu[0].agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue1);
RET_IF_HSA_ERR(status)
// Create a signal with a value of 1 and attach it to the first kernel
// dispatch packet
hsa_signal_t completion_signal_1;
status = hsa_signal_create(1, 0, NULL, &completion_signal_1);
RET_IF_HSA_ERR(status)
// First dispath packet on queue 1, Kernel A
{
MQDependencyTest::Aql packet{};
packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
packet.header.barrier = 1;
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
packet.dispatch.setup = 1;
packet.dispatch.workgroup_size_x = 64;
packet.dispatch.workgroup_size_y = 1;
packet.dispatch.workgroup_size_z = 1;
packet.dispatch.grid_size_x = 64;
packet.dispatch.grid_size_y = 1;
packet.dispatch.grid_size_z = 1;
packet.dispatch.group_segment_size = copyA.group;
packet.dispatch.private_segment_size = copyA.scratch;
packet.dispatch.kernel_object = copyA.handle;
packet.dispatch.kernarg_address = args;
packet.dispatch.completion_signal = completion_signal_1;
args->a = a;
args->b = b;
// Tell packet processor of A to launch the first kernel dispatch packet
obj.submit_packet(queue1, packet);
}
// Create a signal with a value of 1 and attach it to the second kernel
// dispatch packet
hsa_signal_t completion_signal_2;
status = hsa_signal_create(1, 0, NULL, &completion_signal_2);
RET_IF_HSA_ERR(status)
hsa_signal_t completion_signal_3;
status = hsa_signal_create(1, 0, NULL, &completion_signal_3);
RET_IF_HSA_ERR(status)
// Create barrier-AND packet that is enqueued in queue 1
{
MQDependencyTest::Aql packet{};
packet.header.type = HSA_PACKET_TYPE_BARRIER_AND;
packet.header.barrier = 1;
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
packet.barrier_and.dep_signal[0] = completion_signal_2;
obj.submit_packet(queue1, packet);
}
// Second dispath packet on queue 1, Kernel C
{
MQDependencyTest::Aql packet{};
packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
packet.header.barrier = 1;
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
packet.dispatch.setup = 1;
packet.dispatch.workgroup_size_x = 64;
packet.dispatch.workgroup_size_y = 1;
packet.dispatch.workgroup_size_z = 1;
packet.dispatch.grid_size_x = 64;
packet.dispatch.grid_size_y = 1;
packet.dispatch.grid_size_z = 1;
packet.dispatch.group_segment_size = copyC.group;
packet.dispatch.private_segment_size = copyC.scratch;
packet.dispatch.kernel_object = copyC.handle;
packet.dispatch.completion_signal = completion_signal_3;
packet.dispatch.kernarg_address = args;
args->a = a;
args->b = b;
// Tell packet processor to launch the second kernel dispatch packet
obj.submit_packet(queue1, packet);
}
// Create queue 2
hsa_queue_t* queue2;
status = hsa_queue_create(
obj.gpu[0].agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue2);
RET_IF_HSA_ERR(status)
// Create barrier-AND packet that is enqueued in queue 2
{
MQDependencyTest::Aql packet{};
packet.header.type = HSA_PACKET_TYPE_BARRIER_AND;
packet.header.barrier = 1;
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
packet.barrier_and.dep_signal[0] = completion_signal_1;
obj.submit_packet(queue2, packet);
}
// Third dispath packet on queue 2, Kernel B
{
MQDependencyTest::Aql packet{};
packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
packet.header.barrier = 1;
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
packet.dispatch.setup = 1;
packet.dispatch.workgroup_size_x = 64;
packet.dispatch.workgroup_size_y = 1;
packet.dispatch.workgroup_size_z = 1;
packet.dispatch.grid_size_x = 64;
packet.dispatch.grid_size_y = 1;
packet.dispatch.grid_size_z = 1;
packet.dispatch.group_segment_size = copyB.group;
packet.dispatch.private_segment_size = copyB.scratch;
packet.dispatch.kernel_object = copyB.handle;
packet.dispatch.kernarg_address = args;
packet.dispatch.completion_signal = completion_signal_2;
args->a = a;
args->b = b;
// Tell packet processor to launch the third kernel dispatch packet
obj.submit_packet(queue2, packet);
}
// Wait on the completion signal
hsa_signal_wait_relaxed(
completion_signal_1, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
// Wait on the completion signal
hsa_signal_wait_relaxed(
completion_signal_2, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
// Wait on the completion signal
hsa_signal_wait_relaxed(
completion_signal_3, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
for(int i = 0; i < 64; i++)
{
if(a[i] != b[i])
{
printf("error at %d: expected %d, got %d\n", i, b[i], a[i]);
abort();
}
}
// Clearing data structures and memory
status = hsa_signal_destroy(completion_signal_1);
RET_IF_HSA_ERR(status)
status = hsa_signal_destroy(completion_signal_2);
RET_IF_HSA_ERR(status)
status = hsa_signal_destroy(completion_signal_3);
RET_IF_HSA_ERR(status)
if(queue1 != nullptr)
{
status = hsa_queue_destroy(queue1);
RET_IF_HSA_ERR(status)
}
if(queue2 != nullptr)
{
status = hsa_queue_destroy(queue2);
RET_IF_HSA_ERR(status)
}
status = hsa_memory_free(a);
RET_IF_HSA_ERR(status)
status = hsa_memory_free(b);
RET_IF_HSA_ERR(status)
status = hsa_executable_destroy(code_object.executable);
RET_IF_HSA_ERR(status)
status = hsa_code_object_reader_destroy(code_object.code_obj_rdr);
RET_IF_HSA_ERR(status)
close(code_object.file);
}
+368
Просмотреть файл
@@ -0,0 +1,368 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include "common/filesystem.hpp"
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>
#include <dlfcn.h>
#include <fcntl.h>
#include <unistd.h>
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <string>
#include <vector>
namespace fs = common::fs;
#define RET_IF_HSA_ERR(err) \
{ \
if((err) != HSA_STATUS_SUCCESS) \
{ \
char err_val[12]; \
char* err_str = nullptr; \
if(hsa_status_string(err, (const char**) &err_str) != HSA_STATUS_SUCCESS) \
{ \
sprintf(&(err_val[0]), "%#x", (uint32_t) err); \
err_str = &(err_val[0]); \
} \
printf("hsa api call failure at: %s:%d\n", __FILE__, __LINE__); \
printf("Call returned %s\n", err_str); \
abort(); \
} \
}
struct Device
{
struct Memory
{
hsa_amd_memory_pool_t pool;
bool fine;
bool kernarg;
size_t size;
size_t granule;
};
hsa_agent_t agent;
char name[64];
std::vector<Memory> pools;
uint32_t fine;
uint32_t coarse;
static std::vector<hsa_agent_t> all_devices;
};
class MQDependencyTest
{
public:
MQDependencyTest() { hsa_init(); }
~MQDependencyTest() { hsa_shut_down(); }
static std::vector<Device> cpu;
static std::vector<Device> gpu;
static Device::Memory kernarg;
struct CodeObject
{
hsa_file_t file;
hsa_code_object_reader_t code_obj_rdr;
hsa_executable_t executable;
};
struct Kernel
{
uint64_t handle;
uint32_t scratch;
uint32_t group;
uint32_t kernarg_size;
uint32_t kernarg_align;
};
union AqlHeader
{
struct
{
uint16_t type : 8;
uint16_t barrier : 1;
uint16_t acquire : 2;
uint16_t release : 2;
uint16_t reserved : 3;
};
uint16_t raw;
};
struct BarrierValue
{
AqlHeader header;
uint8_t AmdFormat;
uint8_t reserved;
uint32_t reserved1;
hsa_signal_t signal;
hsa_signal_value_t value;
hsa_signal_value_t mask;
uint32_t cond;
uint32_t reserved2;
uint64_t reserved3;
uint64_t reserved4;
hsa_signal_t completion_signal;
};
union Aql
{
AqlHeader header;
hsa_kernel_dispatch_packet_t dispatch;
hsa_barrier_and_packet_t barrier_and;
hsa_barrier_or_packet_t barrier_or;
BarrierValue barrier_value;
};
struct OCLHiddenArgs
{
uint64_t offset_x;
uint64_t offset_y;
uint64_t offset_z;
void* printf_buffer;
void* enqueue;
void* enqueue2;
void* multi_grid;
};
bool load_code_object(std::string filename, hsa_agent_t agent, CodeObject& code_object)
{
hsa_status_t err;
code_object.file = open(filename.c_str(), O_RDONLY);
if(code_object.file == -1)
{
fprintf(stderr, "%s:%s\n", "Could not load code object", filename.c_str());
abort();
return false;
}
err = hsa_code_object_reader_create_from_file(code_object.file, &code_object.code_obj_rdr);
RET_IF_HSA_ERR(err);
err = hsa_executable_create_alt(HSA_PROFILE_FULL,
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
nullptr,
&code_object.executable);
RET_IF_HSA_ERR(err);
err = hsa_executable_load_agent_code_object(
code_object.executable, agent, code_object.code_obj_rdr, nullptr, nullptr);
if(err != HSA_STATUS_SUCCESS) return false;
err = hsa_executable_freeze(code_object.executable, nullptr);
RET_IF_HSA_ERR(err);
return true;
}
bool get_kernel(const CodeObject& code_object,
std::string kernel,
hsa_agent_t agent,
Kernel& kern)
{
hsa_executable_symbol_t symbol;
hsa_status_t err = hsa_executable_get_symbol_by_name(
code_object.executable, kernel.c_str(), &agent, &symbol);
if(err != HSA_STATUS_SUCCESS)
{
err = hsa_executable_get_symbol_by_name(
code_object.executable, (kernel + ".kd").c_str(), &agent, &symbol);
if(err != HSA_STATUS_SUCCESS)
{
return false;
}
}
printf("\nkernel-name: %s\n", kernel.c_str());
err = hsa_executable_symbol_get_info(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kern.handle);
RET_IF_HSA_ERR(err);
return true;
}
// Not for parallel insertion.
bool submit_packet(hsa_queue_t* queue, Aql& pkt)
{
size_t mask = queue->size - 1;
Aql* ring = static_cast<Aql*>(queue->base_address);
uint64_t write = hsa_queue_load_write_index_relaxed(queue);
uint64_t read = hsa_queue_load_read_index_relaxed(queue);
if(write - read + 1 > queue->size) return false;
Aql& dst = ring[write & mask];
uint16_t header = pkt.header.raw;
pkt.header.raw = dst.header.raw;
dst = pkt;
__atomic_store_n(&dst.header.raw, header, __ATOMIC_RELEASE);
pkt.header.raw = header;
hsa_queue_store_write_index_release(queue, write + 1);
hsa_signal_store_screlease(queue->doorbell_signal, write);
return true;
}
void* hsa_malloc(size_t size, const Device::Memory& mem)
{
void* ret;
hsa_status_t err = hsa_amd_memory_pool_allocate(mem.pool, size, 0, &ret);
RET_IF_HSA_ERR(err);
err = hsa_amd_agents_allow_access(
Device::all_devices.size(), &Device::all_devices[0], nullptr, ret);
RET_IF_HSA_ERR(err);
return ret;
}
void* hsa_malloc(size_t size, const Device& dev, bool fine)
{
uint32_t index = fine ? dev.fine : dev.coarse;
assert(index != -1u && "Memory type unavailable.");
return hsa_malloc(size, dev.pools[index]);
}
bool device_discovery()
{
hsa_status_t err;
err = hsa_iterate_agents(
[](hsa_agent_t agent, void*) {
hsa_status_t error;
Device dev;
dev.agent = agent;
dev.fine = -1u;
dev.coarse = -1u;
error = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, dev.name);
RET_IF_HSA_ERR(error)
hsa_device_type_t type;
error = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
RET_IF_HSA_ERR(error)
error = hsa_amd_agent_iterate_memory_pools(
agent,
[](hsa_amd_memory_pool_t pool, void* data) {
std::vector<Device::Memory>& pools =
*reinterpret_cast<std::vector<Device::Memory>*>(data);
hsa_status_t status;
hsa_amd_segment_t segment;
status = hsa_amd_memory_pool_get_info(
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
RET_IF_HSA_ERR(status)
if(segment != HSA_AMD_SEGMENT_GLOBAL) return HSA_STATUS_SUCCESS;
uint32_t flags;
status = hsa_amd_memory_pool_get_info(
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flags);
RET_IF_HSA_ERR(status)
Device::Memory mem;
mem.pool = pool;
mem.fine = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED);
mem.kernarg = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT);
status = hsa_amd_memory_pool_get_info(
pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &mem.size);
RET_IF_HSA_ERR(status)
status = hsa_amd_memory_pool_get_info(
pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &mem.granule);
RET_IF_HSA_ERR(status)
pools.push_back(mem);
return HSA_STATUS_SUCCESS;
},
static_cast<void*>(&dev.pools));
if(!dev.pools.empty())
{
for(size_t i = 0; i < dev.pools.size(); i++)
{
if(dev.pools[i].fine && dev.pools[i].kernarg && dev.fine == -1u)
dev.fine = i;
if(dev.pools[i].fine && !dev.pools[i].kernarg) dev.fine = i;
if(!dev.pools[i].fine) dev.coarse = i;
}
if(type == HSA_DEVICE_TYPE_CPU)
cpu.push_back(dev);
else
gpu.push_back(dev);
Device::all_devices.push_back(dev.agent);
}
return HSA_STATUS_SUCCESS;
},
nullptr);
[]() {
for(auto& dev : cpu)
{
for(auto& mem : dev.pools)
{
if(mem.fine && mem.kernarg)
{
kernarg = mem;
return;
}
}
}
}();
RET_IF_HSA_ERR(err);
if(cpu.empty() || gpu.empty() || kernarg.pool.handle == 0) return false;
return true;
}
void search_hasco(const fs::path& directory, std::string& filename)
{
for(const auto& entry : fs::directory_iterator(directory))
{
if(fs::is_regular_file(entry))
{
if(entry.path().filename() == filename)
{
filename = entry.path();
}
}
else if(fs::is_directory(entry))
{
search_hasco(entry, filename); // Recursive call for subdirectories
}
}
}
};
-5
Просмотреть файл
@@ -39,8 +39,3 @@ target_compile_options(multistream PRIVATE -W -Wall -Wextra -Wpedantic -Wshadow
find_package(Threads REQUIRED)
target_link_libraries(multistream PRIVATE Threads::Threads)
install(
TARGETS multistream
DESTINATION bin
COMPONENT tests)
-5
Просмотреть файл
@@ -52,8 +52,3 @@ if(REPRODUCIBLE_RUNTIME_USE_MPI)
target_compile_definitions(reproducible-runtime PRIVATE USE_MPI)
target_link_libraries(reproducible-runtime PRIVATE MPI::MPI_C)
endif()
install(
TARGETS reproducible-runtime
DESTINATION bin
COMPONENT tests)
-5
Просмотреть файл
@@ -40,8 +40,3 @@ target_compile_options(scratch-memory PRIVATE -W -Wall -Wextra -Wpedantic -Wshad
find_package(Threads REQUIRED)
target_link_libraries(scratch-memory PRIVATE Threads::Threads hsa-runtime64)
install(
TARGETS scratch-memory
DESTINATION bin
COMPONENT tests)
-5
Просмотреть файл
@@ -44,8 +44,3 @@ target_link_libraries(simple-transpose PRIVATE Threads::Threads)
find_package(rocprofiler-sdk-roctx REQUIRED)
target_link_libraries(simple-transpose
PRIVATE rocprofiler-sdk-roctx::rocprofiler-sdk-roctx)
install(
TARGETS simple-transpose
DESTINATION bin
COMPONENT tests)
-5
Просмотреть файл
@@ -50,8 +50,3 @@ if(TRANSPOSE_USE_MPI)
target_compile_definitions(transpose PRIVATE USE_MPI)
target_link_libraries(transpose PRIVATE MPI::MPI_C)
endif()
install(
TARGETS transpose
DESTINATION bin
COMPONENT tests)
-5
Просмотреть файл
@@ -39,8 +39,3 @@ target_compile_options(vector-ops PRIVATE -W -Wall -Wextra -Wpedantic -Wshadow -
find_package(Threads REQUIRED)
target_link_libraries(vector-ops PRIVATE Threads::Threads)
install(
TARGETS vector-ops
DESTINATION bin
COMPONENT tests)
+18
Просмотреть файл
@@ -11,6 +11,24 @@ set(ROCPROFILER_DEFAULT_FAIL_REGEX
"threw an exception|Permission denied|Could not create logging file"
CACHE STRING "Default FAIL_REGULAR_EXPRESSION for tests")
set(DEFAULT_GPU_TARGETS
"gfx900"
"gfx906"
"gfx908"
"gfx90a"
"gfx940"
"gfx941"
"gfx942"
"gfx1030"
"gfx1010"
"gfx1100"
"gfx1101"
"gfx1102")
set(GPU_TARGETS
"${DEFAULT_GPU_TARGETS}"
CACHE STRING "GPU targets to compile for")
# build flags
add_library(rocprofiler-tests-build-flags INTERFACE)
add_library(rocprofiler::tests-build-flags ALIAS rocprofiler-tests-build-flags)
-5
Просмотреть файл
@@ -54,8 +54,3 @@ if(TRANSPOSE_USE_MPI)
target_compile_definitions(transpose-shared-library PRIVATE USE_MPI)
target_link_libraries(transpose-shared-library PRIVATE MPI::MPI_C)
endif()
install(
TARGETS transpose-shared-library
DESTINATION lib
COMPONENT tests)
-5
Просмотреть файл
@@ -42,8 +42,3 @@ set_target_properties(vector-ops-shared-library PROPERTIES OUTPUT_NAME vector-op
find_package(Threads REQUIRED)
target_link_libraries(vector-ops-shared-library PRIVATE Threads::Threads)
install(
TARGETS vector-ops-shared-library
DESTINATION lib
COMPONENT tests)
+1
Просмотреть файл
@@ -27,3 +27,4 @@ add_subdirectory(tracing)
add_subdirectory(tracing-plus-cc)
add_subdirectory(tracing-hip-in-libraries)
add_subdirectory(counter-collection)
add_subdirectory(hsa-queue-dependency)
+67
Просмотреть файл
@@ -0,0 +1,67 @@
#
# rocprofv3 tool test
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(
rocprofiler-tests-rocprofv3-hsa-queue-tracing
LANGUAGES CXX
VERSION 0.0.0)
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
if(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer")
set(LOG_LEVEL "warning") # info produces memory leak
else()
set(LOG_LEVEL "info")
endif()
set(tracing-env
"${PRELOAD_ENV}" "ROCPROF_LOG_LEVEL=${LOG_LEVEL}"
"ROCPROFILER_LOG_LEVEL=${LOG_LEVEL}"
"HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler::rocprofiler-shared-library>")
foreach(FILENAME validate.py conftest.py)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}
${CMAKE_CURRENT_BINARY_DIR}/${FILENAME} COPYONLY)
endforeach()
find_package(rocprofiler-sdk REQUIRED)
# hsa multiqueue dependency test
add_test(
NAME rocprofv3-test-hsa-multiqueue-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --hsa-trace --kernel-trace -d
${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace -o out $<TARGET_FILE:multiqueue_testapp>)
set_tests_properties(
rocprofv3-test-hsa-multiqueue-execute
PROPERTIES LABELS "integration-tests" ENVIRONMENT "${tracing-env}"
FAIL_REGULAR_EXPRESSION "HSA_API|HIP_API")
add_test(
NAME rocprofv3-test-hsa-multiqueue-validate
COMMAND
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --hsa-trace-input
${CMAKE_CURRENT_BINARY_DIR}/multiqueue_testapp-trace/out_hsa_api_trace.csv
--kernel-trace-input
${CMAKE_CURRENT_BINARY_DIR}/multiqueue_testapp-trace/out_kernel_trace.csv)
set(MULTIQUEUE_VALIDATION_FILES
${CMAKE_CURRENT_BINARY_DIR}/multiqueue_testapp-trace/out_hsa_api_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/multiqueue_testapp-trace/out_kernel_api_trace.csv)
set_tests_properties(
rocprofv3-test-hsa-multiqueue-validate
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
DEPENDS
rocprofv3-test-hsa-multiqueue-execute
FAIL_REGULAR_EXPRESSION
"AssertionError"
ATTACHED_FILES_ON_FAIL
"${MULTIQUEUE_VALIDATION_FILES}")
+41
Просмотреть файл
@@ -0,0 +1,41 @@
#!/usr/bin/env python3
import csv
import pytest
def pytest_addoption(parser):
parser.addoption(
"--hsa-trace-input",
action="store",
help="Path to HSA API tracing CSV file.",
)
parser.addoption(
"--kernel-trace-input",
action="store",
help="Path to Kernel API tracing CSV file.",
)
@pytest.fixture
def hsa_trace_input_data(request):
filename = request.config.getoption("--hsa-trace-input")
data = []
with open(filename, "r") as inp:
reader = csv.DictReader(inp)
for row in reader:
data.append(row)
return data
@pytest.fixture
def kernel_trace_input_data(request):
filename = request.config.getoption("--kernel-trace-input")
data = []
with open(filename, "r") as inp:
reader = csv.DictReader(inp)
for row in reader:
data.append(row)
return data
+70
Просмотреть файл
@@ -0,0 +1,70 @@
#!/usr/bin/env python3
import sys
import pytest
def test_hsa_api_trace(hsa_trace_input_data):
functions = []
correlation_ids = []
for row in hsa_trace_input_data:
assert row["Domain"] in (
"HSA_CORE_API",
"HSA_AMD_EXT_API",
"HSA_IMAGE_EXT_API",
"HSA_FINALIZE_EXT_API",
)
assert int(row["Process_Id"]) > 0
assert int(row["Thread_Id"]) >= int(row["Process_Id"])
assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"])
functions.append(row["Function"])
correlation_ids.append(int(row["Correlation_Id"]))
correlation_ids = sorted(list(set(correlation_ids)))
# deterministic call counts
num_queue_create_calls = 2
num_queue_destroy_calls = 2
num_hsa_mem_free_calls = 2
# signal create/destroy calls
# although the app explicitly only creates 3 signals
# but hsa_init() internally calls hsa_signal_create
num_hsa_signal_create_calls = 4
num_hsa_signal_destroy_calls = 4
# all correlation ids are unique
assert len(correlation_ids) == len(hsa_trace_input_data)
functions = list(functions)
assert "hsa_shut_down" in functions
assert functions.count("hsa_queue_create") == num_queue_create_calls
assert functions.count("hsa_queue_destroy") == num_queue_destroy_calls
assert functions.count("hsa_memory_free") == num_hsa_mem_free_calls
assert functions.count("hsa_signal_create") == num_hsa_signal_create_calls
assert functions.count("hsa_signal_destroy") == num_hsa_signal_destroy_calls
def test_kernel_trace(kernel_trace_input_data):
valid_kernel_names = ("copyA", "copyB", "copyC")
assert len(kernel_trace_input_data) == 3
for row in kernel_trace_input_data:
assert row["Kind"] == "KERNEL_DISPATCH"
assert int(row["Agent_Id"]) > 0
assert int(row["Queue_Id"]) > 0
assert int(row["Kernel_Id"]) > 0
assert row["Kernel_Name"] in valid_kernel_names
assert int(row["Correlation_Id"]) > 0
assert int(row["Workgroup_Size_X"]) == 64
assert int(row["Workgroup_Size_Y"]) == 1
assert int(row["Workgroup_Size_Z"]) == 1
assert int(row["Grid_Size_X"]) == 64
assert int(row["Grid_Size_Y"]) == 1
assert int(row["Grid_Size_Z"]) == 1
assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"])
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)
-5
Просмотреть файл
@@ -25,11 +25,6 @@ set_target_properties(
${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}
INSTALL_RPATH "\$ORIGIN:\$ORIGIN/..")
install(
TARGETS rocprofiler-sdk-json-tool
DESTINATION lib/rocprofiler-sdk
COMPONENT tests)
# tool library which just checks that tools can be compiled with C language
add_library(rocprofiler-sdk-c-tool SHARED)
target_sources(rocprofiler-sdk-c-tool PRIVATE c-tool.c)