hsa multiqueue application (#618)
* hsa multiqueuw application
* cmake formatting (cmake-format) (#619)
Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com>
* source formatting (clang-format v11) (#620)
Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com>
* comppialtion fix
* Update tests/bin/CMakeLists.txt
Reorder `add_subdirectory` to fix (recurrent) issues with ROCTx in `CMAKE_BUILD_RPATH`
* addressing early feedback
* cmake updates
* more cmake updates
* adding queue dependency test
* updating test
* test updates
* removed hsa_api_trace header
* reformating headers to prevent clang from reordering
* Fixing packaging
* Fixes for hangs
* source formatting (clang-format v11) (#676)
Co-authored-by: bwelton <1683479+bwelton@users.noreply.github.com>
* cmake formatting (cmake-format) (#673)
Co-authored-by: bwelton <1683479+bwelton@users.noreply.github.com>
* structure change
* cmake formatting (cmake-format) (#680)
Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com>
* Adding kernel trace to test hang fix
* rebased and fixed kernel-trace test
* rhel clang fixes
* source formatting (clang-format v11) (#685)
Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com>
* Update lib/rocprofiler-sdk-tool/helper.hpp
- remove suppression of -Wshadow
* Update tests/bin/hsa-queue-dependency/CMakeLists.txt
- cleanup unnecessary code
- GPU_LIST -> GPU_TARGETS
* GPU_LIST -> GPU_TARGETS
* Remove installation of test executable and libraries
---------
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: bgopesh <7112102+bgopesh@users.noreply.github.com>
Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com>
Co-authored-by: Benjamin Welton <bewelton@amd.com>
Co-authored-by: bwelton <1683479+bwelton@users.noreply.github.com>
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
[ROCm/rocprofiler-sdk commit: 348d740388]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
73ff4f2502
Коммит
6076c751a3
+3
-3
@@ -22,9 +22,9 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_LIST, etc. with internal handling in cmake
|
||||
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake
|
||||
ROCM_PATH: "/opt/rocm"
|
||||
GPU_LIST: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
|
||||
GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
|
||||
PATH: "/usr/bin:$PATH"
|
||||
INCLUDED_TESTS: ".*pc_sampling.*"
|
||||
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
|
||||
--build-jobs 16
|
||||
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
|
||||
--gpu-targets ${{ env.GPU_LIST }}
|
||||
--gpu-targets ${{ env.GPU_TARGETS }}
|
||||
${{ matrix.ci-flags }}
|
||||
--
|
||||
-DROCPROFILER_DEP_ROCMCORE=ON
|
||||
|
||||
+7
-7
@@ -17,9 +17,9 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_LIST, etc. with internal handling in cmake
|
||||
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake
|
||||
ROCM_PATH: "/opt/rocm"
|
||||
GPU_LIST: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
|
||||
GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
|
||||
PATH: "/usr/bin:$PATH"
|
||||
EXCLUDED_TESTS: ".*pc_sampling.*"
|
||||
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
|
||||
--build-jobs 16
|
||||
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
|
||||
--gpu-targets ${{ env.GPU_LIST }}
|
||||
--gpu-targets ${{ env.GPU_TARGETS }}
|
||||
--run-attempt ${{ github.run_attempt }}
|
||||
${{ matrix.ci-flags }}
|
||||
--
|
||||
@@ -210,7 +210,7 @@ jobs:
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-codecov
|
||||
--build-jobs 16
|
||||
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
|
||||
--gpu-targets ${{ env.GPU_LIST }}
|
||||
--gpu-targets ${{ env.GPU_TARGETS }}
|
||||
--coverage all
|
||||
--run-attempt ${{ github.run_attempt }}
|
||||
--
|
||||
@@ -228,7 +228,7 @@ jobs:
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-codecov-tests
|
||||
--build-jobs 16
|
||||
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
|
||||
--gpu-targets ${{ env.GPU_LIST }}
|
||||
--gpu-targets ${{ env.GPU_TARGETS }}
|
||||
--coverage tests
|
||||
--run-attempt ${{ github.run_attempt }}
|
||||
--
|
||||
@@ -246,7 +246,7 @@ jobs:
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-codecov-samples
|
||||
--build-jobs 16
|
||||
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
|
||||
--gpu-targets ${{ env.GPU_LIST }}
|
||||
--gpu-targets ${{ env.GPU_TARGETS }}
|
||||
--coverage samples
|
||||
--run-attempt ${{ github.run_attempt }}
|
||||
--
|
||||
@@ -403,7 +403,7 @@ jobs:
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.sanitizer }}
|
||||
--build-jobs 16
|
||||
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
|
||||
--gpu-targets ${{ env.GPU_LIST }}
|
||||
--gpu-targets ${{ env.GPU_TARGETS }}
|
||||
--memcheck ${{ matrix.sanitizer }}
|
||||
--run-attempt ${{ github.run_attempt }}
|
||||
${{ matrix.ci-flags }}
|
||||
|
||||
поставляемый
+12
-12
@@ -9,9 +9,9 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_LIST, etc. with internal handling in cmake
|
||||
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_TARGETS, etc. with internal handling in cmake
|
||||
ROCM_PATH: "/opt/rocm"
|
||||
GPU_LIST: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
|
||||
GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
|
||||
PATH: "/usr/bin:$PATH"
|
||||
EXCLUDED_TESTS: ".*pc_sampling.*"
|
||||
|
||||
@@ -48,7 +48,7 @@ jobs:
|
||||
body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details."
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install requirements
|
||||
@@ -78,7 +78,7 @@ jobs:
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
|
||||
--build-jobs 16
|
||||
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
|
||||
--gpu-targets ${{ env.GPU_LIST }}
|
||||
--gpu-targets ${{ env.GPU_TARGETS }}
|
||||
${{ matrix.ci-flags }}
|
||||
--
|
||||
-DROCPROFILER_DEP_ROCMCORE=ON
|
||||
@@ -147,7 +147,7 @@ jobs:
|
||||
body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details."
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install requirements
|
||||
@@ -177,7 +177,7 @@ jobs:
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
|
||||
--build-jobs 16
|
||||
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
|
||||
--gpu-targets ${{ env.GPU_LIST }}
|
||||
--gpu-targets ${{ env.GPU_TARGETS }}
|
||||
${{ matrix.ci-flags }}
|
||||
--
|
||||
-DROCPROFILER_DEP_ROCMCORE=ON
|
||||
@@ -246,7 +246,7 @@ jobs:
|
||||
body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details."
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install requirements
|
||||
@@ -276,7 +276,7 @@ jobs:
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
|
||||
--build-jobs 16
|
||||
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
|
||||
--gpu-targets ${{ env.GPU_LIST }}
|
||||
--gpu-targets ${{ env.GPU_TARGETS }}
|
||||
${{ matrix.ci-flags }}
|
||||
--
|
||||
-DROCPROFILER_DEP_ROCMCORE=ON
|
||||
@@ -345,7 +345,7 @@ jobs:
|
||||
body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details."
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install requirements
|
||||
@@ -375,7 +375,7 @@ jobs:
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
|
||||
--build-jobs 16
|
||||
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
|
||||
--gpu-targets ${{ env.GPU_LIST }}
|
||||
--gpu-targets ${{ env.GPU_TARGETS }}
|
||||
${{ matrix.ci-flags }}
|
||||
--
|
||||
-DROCPROFILER_DEP_ROCMCORE=ON
|
||||
@@ -444,7 +444,7 @@ jobs:
|
||||
body: "Rerun Started!, Please check https://github.com/${{github.repository}}/commit/${{github.sha}}/checks/${{github.run_id}} for more details."
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install requirements
|
||||
@@ -474,7 +474,7 @@ jobs:
|
||||
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}${{ matrix.name-tag }}
|
||||
--build-jobs 16
|
||||
--site $(echo $RUNNER_HOSTNAME)-$(/opt/rocm/bin/rocm_agent_enumerator | sed -n '2 p')
|
||||
--gpu-targets ${{ env.GPU_LIST }}
|
||||
--gpu-targets ${{ env.GPU_TARGETS }}
|
||||
${{ matrix.ci-flags }}
|
||||
--
|
||||
-DROCPROFILER_DEP_ROCMCORE=ON
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# ROCprofiler-SDK: Application Profiling, Tracing, and Performance Analysis
|
||||
|
||||
***
|
||||
Note: rocprofiler-sdk is currently `not` supported as part of public ROCm software stack and only distributed as a beta
|
||||
Note: rocprofiler-sdk is currently `not` supported as part of public ROCm software stack and only distributed as a beta
|
||||
release to customers.
|
||||
***
|
||||
|
||||
@@ -38,7 +38,7 @@ It can be set by the user in different location if needed.
|
||||
## Build and Installation
|
||||
|
||||
```bash
|
||||
git clone https://git@github.com:ROCm/rocprofiler-sdk-internal.git rocprofiler-sdk-source
|
||||
git clone https://git@github.com:ROCm/rocprofiler-sdk-internal.git rocprofiler-sdk-source
|
||||
cmake \
|
||||
-B rocprofiler-sdk-build \
|
||||
-D ROCPROFILER_BUILD_TESTS=ON \
|
||||
@@ -47,7 +47,7 @@ cmake \
|
||||
-D CMAKE_INSTALL_PREFIX=/opt/rocm \
|
||||
rocprofiler-sdk-source
|
||||
|
||||
cmake --build rocprofiler-sdk-build --target all --parallel 8
|
||||
cmake --build rocprofiler-sdk-build --target all --parallel 8
|
||||
```
|
||||
|
||||
To install ROCprofiler, run:
|
||||
|
||||
@@ -5,6 +5,14 @@ cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
project(rocprofiler-tests-bin LANGUAGES C CXX)
|
||||
|
||||
set(CMAKE_BUILD_RPATH
|
||||
"\$ORIGIN:\$ORIGIN/../lib:$<TARGET_FILE_DIR:rocprofiler-sdk-roctx::rocprofiler-sdk-roctx-shared-library>"
|
||||
)
|
||||
|
||||
# applications used by integration tests which DO link to rocprofiler-sdk-roctx
|
||||
add_subdirectory(reproducible-runtime)
|
||||
add_subdirectory(transpose)
|
||||
|
||||
set(CMAKE_BUILD_RPATH "\$ORIGIN:\$ORIGIN/../lib")
|
||||
|
||||
# applications used by integration tests which DO NOT link to rocprofiler-sdk-roctx
|
||||
@@ -13,11 +21,4 @@ add_subdirectory(multistream)
|
||||
add_subdirectory(vector-operations)
|
||||
add_subdirectory(hip-in-libraries)
|
||||
add_subdirectory(scratch-memory)
|
||||
|
||||
set(CMAKE_BUILD_RPATH
|
||||
"\$ORIGIN:\$ORIGIN/../lib:$<TARGET_FILE_DIR:rocprofiler-sdk-roctx::rocprofiler-sdk-roctx-shared-library>"
|
||||
)
|
||||
|
||||
# applications used by integration tests which DO link to rocprofiler-sdk-roctx
|
||||
add_subdirectory(reproducible-runtime)
|
||||
add_subdirectory(transpose)
|
||||
add_subdirectory(hsa-queue-dependency)
|
||||
|
||||
@@ -27,8 +27,3 @@ if(TRANSPOSE_USE_MPI)
|
||||
target_compile_definitions(hip-in-libraries PRIVATE USE_MPI)
|
||||
target_link_libraries(hip-in-libraries PRIVATE MPI::MPI_C)
|
||||
endif()
|
||||
|
||||
install(
|
||||
TARGETS hip-in-libraries
|
||||
DESTINATION bin
|
||||
COMPONENT tests)
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
#
|
||||
#
|
||||
# HSA multi-queue dependency test
|
||||
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
project(rocprofiler-tests-bin-hsa-multiqueue-dependency LANGUAGES CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
find_program(
|
||||
amdclangpp_EXECUTABLE REQUIRED
|
||||
NAMES amdclang++
|
||||
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATH_SUFFIXES bin llvm/bin NO_CACHE)
|
||||
|
||||
function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE)
|
||||
separate_arguments(
|
||||
CLANG_ARG_LIST
|
||||
UNIX_COMMAND
|
||||
"-O2 -x cl -Xclang -finclude-default-header -cl-denorms-are-zero -cl-std=CL2.0 -Wl,--build-id=sha1
|
||||
-target amdgcn-amd-amdhsa -mcpu=${TARGET_ID} -o ${OUTPUT_FILE} ${INPUT_FILE}")
|
||||
add_custom_command(
|
||||
OUTPUT ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
|
||||
COMMAND ${amdclangpp_EXECUTABLE} ${CLANG_ARG_LIST}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
|
||||
${CMAKE_BINARY_DIR}/tests/rocprofv3/hsa-queue-dependency/${OUTPUT_FILE}
|
||||
OUTPUT ${CMAKE_BINARY_DIR}/tests/rocprofv3/hsa-queue-dependency/${OUTPUT_FILE}
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -E copy
|
||||
${CMAKE_BINARY_DIR}/tests/rocprofv3/hsa-queue-dependency/${OUTPUT_FILE}
|
||||
${CMAKE_BINARY_DIR}/rocprofv3/hsa-queue-dependency/${OUTPUT_FILE}
|
||||
COMMENT "Building ${OUTPUT_FILE}...")
|
||||
set(HSACO_TARGET_LIST
|
||||
${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
|
||||
PARENT_SCOPE)
|
||||
endfunction(generate_hsaco)
|
||||
|
||||
foreach(target_id ${GPU_TARGETS})
|
||||
# generate kernel bitcodes
|
||||
generate_hsaco(${target_id} ${CMAKE_CURRENT_SOURCE_DIR}/copy.cl
|
||||
${target_id}_copy.hsaco)
|
||||
endforeach(target_id)
|
||||
|
||||
add_custom_target(generate_hsaco_targets DEPENDS ${HSACO_TARGET_LIST})
|
||||
|
||||
add_executable(multiqueue_testapp)
|
||||
target_sources(multiqueue_testapp PRIVATE multiqueue_app.cpp)
|
||||
target_compile_options(multiqueue_testapp PRIVATE -W -Wall -Wextra -Wshadow -Werror)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
target_link_libraries(multiqueue_testapp PRIVATE stdc++fs Threads::Threads)
|
||||
|
||||
find_package(
|
||||
amd_comgr
|
||||
REQUIRED
|
||||
CONFIG
|
||||
HINTS
|
||||
${CMAKE_INSTALL_PREFIX}
|
||||
PATHS
|
||||
${ROCM_PATH}
|
||||
PATH_SUFFIXES
|
||||
lib/cmake/amd_comgr)
|
||||
|
||||
target_link_libraries(multiqueue_testapp PRIVATE amd_comgr)
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
target_link_libraries(multiqueue_testapp PRIVATE rocprofiler::rocprofiler
|
||||
rocprofiler::tests-common-library)
|
||||
|
||||
target_compile_definitions(multiqueue_testapp PUBLIC AMD_INTERNAL_BUILD=1)
|
||||
|
||||
find_package(
|
||||
hsa-runtime64
|
||||
REQUIRED
|
||||
CONFIG
|
||||
HINTS
|
||||
${rocm_version_DIR}
|
||||
${ROCM_PATH}
|
||||
PATHS
|
||||
${rocm_version_DIR}
|
||||
${ROCM_PATH})
|
||||
|
||||
target_link_libraries(multiqueue_testapp PRIVATE hsa-runtime64::hsa-runtime64)
|
||||
|
||||
add_dependencies(multiqueue_testapp generate_hsaco_targets)
|
||||
@@ -0,0 +1,32 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
__kernel void copyA(__global unsigned int* a, __global unsigned int* b) {
|
||||
uint tid = get_global_id(0);
|
||||
a[tid] = b[tid];
|
||||
}
|
||||
__kernel void copyB(__global unsigned int* a, __global unsigned int* b) {
|
||||
uint tid = get_global_id(0);
|
||||
a[tid] = b[tid];
|
||||
}
|
||||
__kernel void copyC(__global unsigned int* a, __global unsigned int* b) {
|
||||
uint tid = get_global_id(0);
|
||||
a[tid] = b[tid];
|
||||
}
|
||||
@@ -0,0 +1,289 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
/** ROC Profiler Multi Queue Dependency Test
|
||||
*
|
||||
* The goal of this test is to ensure ROC profiler does not go to deadlock
|
||||
* when multiple queue are created and they are dependent on each other
|
||||
*
|
||||
*/
|
||||
|
||||
#include "multiqueue_app.h"
|
||||
|
||||
std::vector<hsa_agent_t> Device::all_devices;
|
||||
std::vector<Device> MQDependencyTest::cpu;
|
||||
std::vector<Device> MQDependencyTest::gpu;
|
||||
Device::Memory MQDependencyTest::kernarg;
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
hsa_status_t status;
|
||||
MQDependencyTest obj;
|
||||
|
||||
// Get Agent info
|
||||
obj.device_discovery();
|
||||
|
||||
char agent_name[64];
|
||||
status = hsa_agent_get_info(obj.gpu[0].agent, HSA_AGENT_INFO_NAME, agent_name);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
// Getting hasco Path
|
||||
std::string hasco_file_path = std::string(agent_name) + std::string("_copy.hsaco");
|
||||
obj.search_hasco(fs::current_path(), hasco_file_path);
|
||||
|
||||
MQDependencyTest::CodeObject code_object;
|
||||
if(!obj.load_code_object(hasco_file_path, obj.gpu[0].agent, code_object))
|
||||
{
|
||||
printf("Kernel file not found or not usable with given agent.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
MQDependencyTest::Kernel copyA;
|
||||
if(!obj.get_kernel(code_object, "copyA", obj.gpu[0].agent, copyA))
|
||||
{
|
||||
printf("Test kernel A not found.\n");
|
||||
abort();
|
||||
}
|
||||
MQDependencyTest::Kernel copyB;
|
||||
if(!obj.get_kernel(code_object, "copyB", obj.gpu[0].agent, copyB))
|
||||
{
|
||||
printf("Test kernel B not found.\n");
|
||||
abort();
|
||||
}
|
||||
MQDependencyTest::Kernel copyC;
|
||||
if(!obj.get_kernel(code_object, "copyC", obj.gpu[0].agent, copyC))
|
||||
{
|
||||
printf("Test kernel C not found.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
struct args_t
|
||||
{
|
||||
uint32_t* a;
|
||||
uint32_t* b;
|
||||
MQDependencyTest::OCLHiddenArgs hidden;
|
||||
};
|
||||
|
||||
args_t* args;
|
||||
args = static_cast<args_t*>(obj.hsa_malloc(sizeof(args_t), obj.kernarg));
|
||||
memset(args, 0, sizeof(args_t));
|
||||
|
||||
uint32_t* a = static_cast<uint32_t*>(obj.hsa_malloc(64 * sizeof(uint32_t), obj.kernarg));
|
||||
uint32_t* b = static_cast<uint32_t*>(obj.hsa_malloc(64 * sizeof(uint32_t), obj.kernarg));
|
||||
|
||||
memset(a, 0, 64 * sizeof(uint32_t));
|
||||
memset(b, 1, 64 * sizeof(uint32_t));
|
||||
|
||||
// Create queue in gpu agent and prepare a kernel dispatch packet
|
||||
hsa_queue_t* queue1;
|
||||
status = hsa_queue_create(
|
||||
obj.gpu[0].agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue1);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
// Create a signal with a value of 1 and attach it to the first kernel
|
||||
// dispatch packet
|
||||
hsa_signal_t completion_signal_1;
|
||||
status = hsa_signal_create(1, 0, NULL, &completion_signal_1);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
// First dispath packet on queue 1, Kernel A
|
||||
{
|
||||
MQDependencyTest::Aql packet{};
|
||||
packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
||||
packet.header.barrier = 1;
|
||||
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
|
||||
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
|
||||
|
||||
packet.dispatch.setup = 1;
|
||||
packet.dispatch.workgroup_size_x = 64;
|
||||
packet.dispatch.workgroup_size_y = 1;
|
||||
packet.dispatch.workgroup_size_z = 1;
|
||||
packet.dispatch.grid_size_x = 64;
|
||||
packet.dispatch.grid_size_y = 1;
|
||||
packet.dispatch.grid_size_z = 1;
|
||||
|
||||
packet.dispatch.group_segment_size = copyA.group;
|
||||
packet.dispatch.private_segment_size = copyA.scratch;
|
||||
packet.dispatch.kernel_object = copyA.handle;
|
||||
|
||||
packet.dispatch.kernarg_address = args;
|
||||
packet.dispatch.completion_signal = completion_signal_1;
|
||||
|
||||
args->a = a;
|
||||
args->b = b;
|
||||
// Tell packet processor of A to launch the first kernel dispatch packet
|
||||
obj.submit_packet(queue1, packet);
|
||||
}
|
||||
|
||||
// Create a signal with a value of 1 and attach it to the second kernel
|
||||
// dispatch packet
|
||||
hsa_signal_t completion_signal_2;
|
||||
status = hsa_signal_create(1, 0, NULL, &completion_signal_2);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
hsa_signal_t completion_signal_3;
|
||||
status = hsa_signal_create(1, 0, NULL, &completion_signal_3);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
// Create barrier-AND packet that is enqueued in queue 1
|
||||
{
|
||||
MQDependencyTest::Aql packet{};
|
||||
packet.header.type = HSA_PACKET_TYPE_BARRIER_AND;
|
||||
packet.header.barrier = 1;
|
||||
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
|
||||
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
|
||||
|
||||
packet.barrier_and.dep_signal[0] = completion_signal_2;
|
||||
obj.submit_packet(queue1, packet);
|
||||
}
|
||||
|
||||
// Second dispath packet on queue 1, Kernel C
|
||||
{
|
||||
MQDependencyTest::Aql packet{};
|
||||
packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
||||
packet.header.barrier = 1;
|
||||
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
|
||||
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
|
||||
|
||||
packet.dispatch.setup = 1;
|
||||
packet.dispatch.workgroup_size_x = 64;
|
||||
packet.dispatch.workgroup_size_y = 1;
|
||||
packet.dispatch.workgroup_size_z = 1;
|
||||
packet.dispatch.grid_size_x = 64;
|
||||
packet.dispatch.grid_size_y = 1;
|
||||
packet.dispatch.grid_size_z = 1;
|
||||
|
||||
packet.dispatch.group_segment_size = copyC.group;
|
||||
packet.dispatch.private_segment_size = copyC.scratch;
|
||||
packet.dispatch.kernel_object = copyC.handle;
|
||||
packet.dispatch.completion_signal = completion_signal_3;
|
||||
packet.dispatch.kernarg_address = args;
|
||||
|
||||
args->a = a;
|
||||
args->b = b;
|
||||
// Tell packet processor to launch the second kernel dispatch packet
|
||||
obj.submit_packet(queue1, packet);
|
||||
}
|
||||
|
||||
// Create queue 2
|
||||
hsa_queue_t* queue2;
|
||||
status = hsa_queue_create(
|
||||
obj.gpu[0].agent, 1024, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue2);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
// Create barrier-AND packet that is enqueued in queue 2
|
||||
{
|
||||
MQDependencyTest::Aql packet{};
|
||||
packet.header.type = HSA_PACKET_TYPE_BARRIER_AND;
|
||||
packet.header.barrier = 1;
|
||||
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
|
||||
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
|
||||
|
||||
packet.barrier_and.dep_signal[0] = completion_signal_1;
|
||||
obj.submit_packet(queue2, packet);
|
||||
}
|
||||
|
||||
// Third dispath packet on queue 2, Kernel B
|
||||
{
|
||||
MQDependencyTest::Aql packet{};
|
||||
packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
||||
packet.header.barrier = 1;
|
||||
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
|
||||
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
|
||||
|
||||
packet.dispatch.setup = 1;
|
||||
packet.dispatch.workgroup_size_x = 64;
|
||||
packet.dispatch.workgroup_size_y = 1;
|
||||
packet.dispatch.workgroup_size_z = 1;
|
||||
packet.dispatch.grid_size_x = 64;
|
||||
packet.dispatch.grid_size_y = 1;
|
||||
packet.dispatch.grid_size_z = 1;
|
||||
|
||||
packet.dispatch.group_segment_size = copyB.group;
|
||||
packet.dispatch.private_segment_size = copyB.scratch;
|
||||
packet.dispatch.kernel_object = copyB.handle;
|
||||
|
||||
packet.dispatch.kernarg_address = args;
|
||||
packet.dispatch.completion_signal = completion_signal_2;
|
||||
|
||||
args->a = a;
|
||||
args->b = b;
|
||||
// Tell packet processor to launch the third kernel dispatch packet
|
||||
obj.submit_packet(queue2, packet);
|
||||
}
|
||||
|
||||
// Wait on the completion signal
|
||||
hsa_signal_wait_relaxed(
|
||||
completion_signal_1, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
// Wait on the completion signal
|
||||
hsa_signal_wait_relaxed(
|
||||
completion_signal_2, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
// Wait on the completion signal
|
||||
hsa_signal_wait_relaxed(
|
||||
completion_signal_3, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
for(int i = 0; i < 64; i++)
|
||||
{
|
||||
if(a[i] != b[i])
|
||||
{
|
||||
printf("error at %d: expected %d, got %d\n", i, b[i], a[i]);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// Clearing data structures and memory
|
||||
status = hsa_signal_destroy(completion_signal_1);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
status = hsa_signal_destroy(completion_signal_2);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
status = hsa_signal_destroy(completion_signal_3);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
if(queue1 != nullptr)
|
||||
{
|
||||
status = hsa_queue_destroy(queue1);
|
||||
RET_IF_HSA_ERR(status)
|
||||
}
|
||||
|
||||
if(queue2 != nullptr)
|
||||
{
|
||||
status = hsa_queue_destroy(queue2);
|
||||
RET_IF_HSA_ERR(status)
|
||||
}
|
||||
|
||||
status = hsa_memory_free(a);
|
||||
RET_IF_HSA_ERR(status)
|
||||
status = hsa_memory_free(b);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
status = hsa_executable_destroy(code_object.executable);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
status = hsa_code_object_reader_destroy(code_object.code_obj_rdr);
|
||||
RET_IF_HSA_ERR(status)
|
||||
close(code_object.file);
|
||||
}
|
||||
@@ -0,0 +1,368 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/filesystem.hpp"
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace fs = common::fs;
|
||||
|
||||
#define RET_IF_HSA_ERR(err) \
|
||||
{ \
|
||||
if((err) != HSA_STATUS_SUCCESS) \
|
||||
{ \
|
||||
char err_val[12]; \
|
||||
char* err_str = nullptr; \
|
||||
if(hsa_status_string(err, (const char**) &err_str) != HSA_STATUS_SUCCESS) \
|
||||
{ \
|
||||
sprintf(&(err_val[0]), "%#x", (uint32_t) err); \
|
||||
err_str = &(err_val[0]); \
|
||||
} \
|
||||
printf("hsa api call failure at: %s:%d\n", __FILE__, __LINE__); \
|
||||
printf("Call returned %s\n", err_str); \
|
||||
abort(); \
|
||||
} \
|
||||
}
|
||||
|
||||
struct Device
|
||||
{
|
||||
struct Memory
|
||||
{
|
||||
hsa_amd_memory_pool_t pool;
|
||||
bool fine;
|
||||
bool kernarg;
|
||||
size_t size;
|
||||
size_t granule;
|
||||
};
|
||||
|
||||
hsa_agent_t agent;
|
||||
char name[64];
|
||||
std::vector<Memory> pools;
|
||||
uint32_t fine;
|
||||
uint32_t coarse;
|
||||
static std::vector<hsa_agent_t> all_devices;
|
||||
};
|
||||
|
||||
class MQDependencyTest
|
||||
{
|
||||
public:
|
||||
MQDependencyTest() { hsa_init(); }
|
||||
~MQDependencyTest() { hsa_shut_down(); }
|
||||
|
||||
static std::vector<Device> cpu;
|
||||
static std::vector<Device> gpu;
|
||||
static Device::Memory kernarg;
|
||||
|
||||
struct CodeObject
|
||||
{
|
||||
hsa_file_t file;
|
||||
hsa_code_object_reader_t code_obj_rdr;
|
||||
hsa_executable_t executable;
|
||||
};
|
||||
|
||||
struct Kernel
|
||||
{
|
||||
uint64_t handle;
|
||||
uint32_t scratch;
|
||||
uint32_t group;
|
||||
uint32_t kernarg_size;
|
||||
uint32_t kernarg_align;
|
||||
};
|
||||
|
||||
union AqlHeader
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint16_t type : 8;
|
||||
uint16_t barrier : 1;
|
||||
uint16_t acquire : 2;
|
||||
uint16_t release : 2;
|
||||
uint16_t reserved : 3;
|
||||
};
|
||||
uint16_t raw;
|
||||
};
|
||||
|
||||
struct BarrierValue
|
||||
{
|
||||
AqlHeader header;
|
||||
uint8_t AmdFormat;
|
||||
uint8_t reserved;
|
||||
uint32_t reserved1;
|
||||
hsa_signal_t signal;
|
||||
hsa_signal_value_t value;
|
||||
hsa_signal_value_t mask;
|
||||
uint32_t cond;
|
||||
uint32_t reserved2;
|
||||
uint64_t reserved3;
|
||||
uint64_t reserved4;
|
||||
hsa_signal_t completion_signal;
|
||||
};
|
||||
|
||||
union Aql
|
||||
{
|
||||
AqlHeader header;
|
||||
hsa_kernel_dispatch_packet_t dispatch;
|
||||
hsa_barrier_and_packet_t barrier_and;
|
||||
hsa_barrier_or_packet_t barrier_or;
|
||||
BarrierValue barrier_value;
|
||||
};
|
||||
|
||||
struct OCLHiddenArgs
|
||||
{
|
||||
uint64_t offset_x;
|
||||
uint64_t offset_y;
|
||||
uint64_t offset_z;
|
||||
void* printf_buffer;
|
||||
void* enqueue;
|
||||
void* enqueue2;
|
||||
void* multi_grid;
|
||||
};
|
||||
|
||||
bool load_code_object(std::string filename, hsa_agent_t agent, CodeObject& code_object)
|
||||
{
|
||||
hsa_status_t err;
|
||||
code_object.file = open(filename.c_str(), O_RDONLY);
|
||||
if(code_object.file == -1)
|
||||
{
|
||||
fprintf(stderr, "%s:%s\n", "Could not load code object", filename.c_str());
|
||||
abort();
|
||||
return false;
|
||||
}
|
||||
|
||||
err = hsa_code_object_reader_create_from_file(code_object.file, &code_object.code_obj_rdr);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_executable_create_alt(HSA_PROFILE_FULL,
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
nullptr,
|
||||
&code_object.executable);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_executable_load_agent_code_object(
|
||||
code_object.executable, agent, code_object.code_obj_rdr, nullptr, nullptr);
|
||||
if(err != HSA_STATUS_SUCCESS) return false;
|
||||
|
||||
err = hsa_executable_freeze(code_object.executable, nullptr);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool get_kernel(const CodeObject& code_object,
|
||||
std::string kernel,
|
||||
hsa_agent_t agent,
|
||||
Kernel& kern)
|
||||
{
|
||||
hsa_executable_symbol_t symbol;
|
||||
hsa_status_t err = hsa_executable_get_symbol_by_name(
|
||||
code_object.executable, kernel.c_str(), &agent, &symbol);
|
||||
if(err != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
err = hsa_executable_get_symbol_by_name(
|
||||
code_object.executable, (kernel + ".kd").c_str(), &agent, &symbol);
|
||||
if(err != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
printf("\nkernel-name: %s\n", kernel.c_str());
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kern.handle);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Not for parallel insertion.
|
||||
bool submit_packet(hsa_queue_t* queue, Aql& pkt)
|
||||
{
|
||||
size_t mask = queue->size - 1;
|
||||
Aql* ring = static_cast<Aql*>(queue->base_address);
|
||||
|
||||
uint64_t write = hsa_queue_load_write_index_relaxed(queue);
|
||||
uint64_t read = hsa_queue_load_read_index_relaxed(queue);
|
||||
if(write - read + 1 > queue->size) return false;
|
||||
|
||||
Aql& dst = ring[write & mask];
|
||||
|
||||
uint16_t header = pkt.header.raw;
|
||||
pkt.header.raw = dst.header.raw;
|
||||
dst = pkt;
|
||||
__atomic_store_n(&dst.header.raw, header, __ATOMIC_RELEASE);
|
||||
pkt.header.raw = header;
|
||||
|
||||
hsa_queue_store_write_index_release(queue, write + 1);
|
||||
hsa_signal_store_screlease(queue->doorbell_signal, write);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void* hsa_malloc(size_t size, const Device::Memory& mem)
|
||||
{
|
||||
void* ret;
|
||||
hsa_status_t err = hsa_amd_memory_pool_allocate(mem.pool, size, 0, &ret);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_amd_agents_allow_access(
|
||||
Device::all_devices.size(), &Device::all_devices[0], nullptr, ret);
|
||||
RET_IF_HSA_ERR(err);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* hsa_malloc(size_t size, const Device& dev, bool fine)
|
||||
{
|
||||
uint32_t index = fine ? dev.fine : dev.coarse;
|
||||
assert(index != -1u && "Memory type unavailable.");
|
||||
return hsa_malloc(size, dev.pools[index]);
|
||||
}
|
||||
|
||||
bool device_discovery()
|
||||
{
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_iterate_agents(
|
||||
[](hsa_agent_t agent, void*) {
|
||||
hsa_status_t error;
|
||||
|
||||
Device dev;
|
||||
dev.agent = agent;
|
||||
|
||||
dev.fine = -1u;
|
||||
dev.coarse = -1u;
|
||||
|
||||
error = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, dev.name);
|
||||
RET_IF_HSA_ERR(error)
|
||||
|
||||
hsa_device_type_t type;
|
||||
error = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
|
||||
RET_IF_HSA_ERR(error)
|
||||
|
||||
error = hsa_amd_agent_iterate_memory_pools(
|
||||
agent,
|
||||
[](hsa_amd_memory_pool_t pool, void* data) {
|
||||
std::vector<Device::Memory>& pools =
|
||||
*reinterpret_cast<std::vector<Device::Memory>*>(data);
|
||||
hsa_status_t status;
|
||||
|
||||
hsa_amd_segment_t segment;
|
||||
status = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
if(segment != HSA_AMD_SEGMENT_GLOBAL) return HSA_STATUS_SUCCESS;
|
||||
|
||||
uint32_t flags;
|
||||
status = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flags);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
Device::Memory mem;
|
||||
mem.pool = pool;
|
||||
mem.fine = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED);
|
||||
mem.kernarg = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT);
|
||||
|
||||
status = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &mem.size);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
status = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &mem.granule);
|
||||
RET_IF_HSA_ERR(status)
|
||||
|
||||
pools.push_back(mem);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
static_cast<void*>(&dev.pools));
|
||||
|
||||
if(!dev.pools.empty())
|
||||
{
|
||||
for(size_t i = 0; i < dev.pools.size(); i++)
|
||||
{
|
||||
if(dev.pools[i].fine && dev.pools[i].kernarg && dev.fine == -1u)
|
||||
dev.fine = i;
|
||||
if(dev.pools[i].fine && !dev.pools[i].kernarg) dev.fine = i;
|
||||
if(!dev.pools[i].fine) dev.coarse = i;
|
||||
}
|
||||
|
||||
if(type == HSA_DEVICE_TYPE_CPU)
|
||||
cpu.push_back(dev);
|
||||
else
|
||||
gpu.push_back(dev);
|
||||
|
||||
Device::all_devices.push_back(dev.agent);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
nullptr);
|
||||
|
||||
[]() {
|
||||
for(auto& dev : cpu)
|
||||
{
|
||||
for(auto& mem : dev.pools)
|
||||
{
|
||||
if(mem.fine && mem.kernarg)
|
||||
{
|
||||
kernarg = mem;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}();
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
if(cpu.empty() || gpu.empty() || kernarg.pool.handle == 0) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void search_hasco(const fs::path& directory, std::string& filename)
|
||||
{
|
||||
for(const auto& entry : fs::directory_iterator(directory))
|
||||
{
|
||||
if(fs::is_regular_file(entry))
|
||||
{
|
||||
if(entry.path().filename() == filename)
|
||||
{
|
||||
filename = entry.path();
|
||||
}
|
||||
}
|
||||
else if(fs::is_directory(entry))
|
||||
{
|
||||
search_hasco(entry, filename); // Recursive call for subdirectories
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -39,8 +39,3 @@ target_compile_options(multistream PRIVATE -W -Wall -Wextra -Wpedantic -Wshadow
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
target_link_libraries(multistream PRIVATE Threads::Threads)
|
||||
|
||||
install(
|
||||
TARGETS multistream
|
||||
DESTINATION bin
|
||||
COMPONENT tests)
|
||||
|
||||
@@ -52,8 +52,3 @@ if(REPRODUCIBLE_RUNTIME_USE_MPI)
|
||||
target_compile_definitions(reproducible-runtime PRIVATE USE_MPI)
|
||||
target_link_libraries(reproducible-runtime PRIVATE MPI::MPI_C)
|
||||
endif()
|
||||
|
||||
install(
|
||||
TARGETS reproducible-runtime
|
||||
DESTINATION bin
|
||||
COMPONENT tests)
|
||||
|
||||
@@ -40,8 +40,3 @@ target_compile_options(scratch-memory PRIVATE -W -Wall -Wextra -Wpedantic -Wshad
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
target_link_libraries(scratch-memory PRIVATE Threads::Threads hsa-runtime64)
|
||||
|
||||
install(
|
||||
TARGETS scratch-memory
|
||||
DESTINATION bin
|
||||
COMPONENT tests)
|
||||
|
||||
@@ -44,8 +44,3 @@ target_link_libraries(simple-transpose PRIVATE Threads::Threads)
|
||||
find_package(rocprofiler-sdk-roctx REQUIRED)
|
||||
target_link_libraries(simple-transpose
|
||||
PRIVATE rocprofiler-sdk-roctx::rocprofiler-sdk-roctx)
|
||||
|
||||
install(
|
||||
TARGETS simple-transpose
|
||||
DESTINATION bin
|
||||
COMPONENT tests)
|
||||
|
||||
@@ -50,8 +50,3 @@ if(TRANSPOSE_USE_MPI)
|
||||
target_compile_definitions(transpose PRIVATE USE_MPI)
|
||||
target_link_libraries(transpose PRIVATE MPI::MPI_C)
|
||||
endif()
|
||||
|
||||
install(
|
||||
TARGETS transpose
|
||||
DESTINATION bin
|
||||
COMPONENT tests)
|
||||
|
||||
@@ -39,8 +39,3 @@ target_compile_options(vector-ops PRIVATE -W -Wall -Wextra -Wpedantic -Wshadow -
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
target_link_libraries(vector-ops PRIVATE Threads::Threads)
|
||||
|
||||
install(
|
||||
TARGETS vector-ops
|
||||
DESTINATION bin
|
||||
COMPONENT tests)
|
||||
|
||||
@@ -11,6 +11,24 @@ set(ROCPROFILER_DEFAULT_FAIL_REGEX
|
||||
"threw an exception|Permission denied|Could not create logging file"
|
||||
CACHE STRING "Default FAIL_REGULAR_EXPRESSION for tests")
|
||||
|
||||
set(DEFAULT_GPU_TARGETS
|
||||
"gfx900"
|
||||
"gfx906"
|
||||
"gfx908"
|
||||
"gfx90a"
|
||||
"gfx940"
|
||||
"gfx941"
|
||||
"gfx942"
|
||||
"gfx1030"
|
||||
"gfx1010"
|
||||
"gfx1100"
|
||||
"gfx1101"
|
||||
"gfx1102")
|
||||
|
||||
set(GPU_TARGETS
|
||||
"${DEFAULT_GPU_TARGETS}"
|
||||
CACHE STRING "GPU targets to compile for")
|
||||
|
||||
# build flags
|
||||
add_library(rocprofiler-tests-build-flags INTERFACE)
|
||||
add_library(rocprofiler::tests-build-flags ALIAS rocprofiler-tests-build-flags)
|
||||
|
||||
@@ -54,8 +54,3 @@ if(TRANSPOSE_USE_MPI)
|
||||
target_compile_definitions(transpose-shared-library PRIVATE USE_MPI)
|
||||
target_link_libraries(transpose-shared-library PRIVATE MPI::MPI_C)
|
||||
endif()
|
||||
|
||||
install(
|
||||
TARGETS transpose-shared-library
|
||||
DESTINATION lib
|
||||
COMPONENT tests)
|
||||
|
||||
@@ -42,8 +42,3 @@ set_target_properties(vector-ops-shared-library PROPERTIES OUTPUT_NAME vector-op
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
target_link_libraries(vector-ops-shared-library PRIVATE Threads::Threads)
|
||||
|
||||
install(
|
||||
TARGETS vector-ops-shared-library
|
||||
DESTINATION lib
|
||||
COMPONENT tests)
|
||||
|
||||
@@ -27,3 +27,4 @@ add_subdirectory(tracing)
|
||||
add_subdirectory(tracing-plus-cc)
|
||||
add_subdirectory(tracing-hip-in-libraries)
|
||||
add_subdirectory(counter-collection)
|
||||
add_subdirectory(hsa-queue-dependency)
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
#
|
||||
# rocprofv3 tool test
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
project(
|
||||
rocprofiler-tests-rocprofv3-hsa-queue-tracing
|
||||
LANGUAGES CXX
|
||||
VERSION 0.0.0)
|
||||
|
||||
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
|
||||
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
|
||||
|
||||
if(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer")
|
||||
set(LOG_LEVEL "warning") # info produces memory leak
|
||||
else()
|
||||
set(LOG_LEVEL "info")
|
||||
endif()
|
||||
|
||||
set(tracing-env
|
||||
"${PRELOAD_ENV}" "ROCPROF_LOG_LEVEL=${LOG_LEVEL}"
|
||||
"ROCPROFILER_LOG_LEVEL=${LOG_LEVEL}"
|
||||
"HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler::rocprofiler-shared-library>")
|
||||
|
||||
foreach(FILENAME validate.py conftest.py)
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${FILENAME} COPYONLY)
|
||||
endforeach()
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
|
||||
# hsa multiqueue dependency test
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --hsa-trace --kernel-trace -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace -o out $<TARGET_FILE:multiqueue_testapp>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-execute
|
||||
PROPERTIES LABELS "integration-tests" ENVIRONMENT "${tracing-env}"
|
||||
FAIL_REGULAR_EXPRESSION "HSA_API|HIP_API")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --hsa-trace-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/multiqueue_testapp-trace/out_hsa_api_trace.csv
|
||||
--kernel-trace-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/multiqueue_testapp-trace/out_kernel_trace.csv)
|
||||
|
||||
set(MULTIQUEUE_VALIDATION_FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/multiqueue_testapp-trace/out_hsa_api_trace.csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/multiqueue_testapp-trace/out_kernel_api_trace.csv)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-validate
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DEPENDS
|
||||
rocprofv3-test-hsa-multiqueue-execute
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError"
|
||||
ATTACHED_FILES_ON_FAIL
|
||||
"${MULTIQUEUE_VALIDATION_FILES}")
|
||||
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import csv
|
||||
import pytest
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--hsa-trace-input",
|
||||
action="store",
|
||||
help="Path to HSA API tracing CSV file.",
|
||||
)
|
||||
parser.addoption(
|
||||
"--kernel-trace-input",
|
||||
action="store",
|
||||
help="Path to Kernel API tracing CSV file.",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def hsa_trace_input_data(request):
|
||||
filename = request.config.getoption("--hsa-trace-input")
|
||||
data = []
|
||||
with open(filename, "r") as inp:
|
||||
reader = csv.DictReader(inp)
|
||||
for row in reader:
|
||||
data.append(row)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kernel_trace_input_data(request):
|
||||
filename = request.config.getoption("--kernel-trace-input")
|
||||
data = []
|
||||
with open(filename, "r") as inp:
|
||||
reader = csv.DictReader(inp)
|
||||
for row in reader:
|
||||
data.append(row)
|
||||
|
||||
return data
|
||||
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
|
||||
def test_hsa_api_trace(hsa_trace_input_data):
|
||||
functions = []
|
||||
correlation_ids = []
|
||||
for row in hsa_trace_input_data:
|
||||
assert row["Domain"] in (
|
||||
"HSA_CORE_API",
|
||||
"HSA_AMD_EXT_API",
|
||||
"HSA_IMAGE_EXT_API",
|
||||
"HSA_FINALIZE_EXT_API",
|
||||
)
|
||||
assert int(row["Process_Id"]) > 0
|
||||
assert int(row["Thread_Id"]) >= int(row["Process_Id"])
|
||||
assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"])
|
||||
functions.append(row["Function"])
|
||||
correlation_ids.append(int(row["Correlation_Id"]))
|
||||
|
||||
correlation_ids = sorted(list(set(correlation_ids)))
|
||||
|
||||
# deterministic call counts
|
||||
num_queue_create_calls = 2
|
||||
num_queue_destroy_calls = 2
|
||||
num_hsa_mem_free_calls = 2
|
||||
|
||||
# signal create/destroy calls
|
||||
# although the app explicitly only creates 3 signals
|
||||
# but hsa_init() internally calls hsa_signal_create
|
||||
num_hsa_signal_create_calls = 4
|
||||
num_hsa_signal_destroy_calls = 4
|
||||
|
||||
# all correlation ids are unique
|
||||
assert len(correlation_ids) == len(hsa_trace_input_data)
|
||||
|
||||
functions = list(functions)
|
||||
assert "hsa_shut_down" in functions
|
||||
assert functions.count("hsa_queue_create") == num_queue_create_calls
|
||||
assert functions.count("hsa_queue_destroy") == num_queue_destroy_calls
|
||||
assert functions.count("hsa_memory_free") == num_hsa_mem_free_calls
|
||||
assert functions.count("hsa_signal_create") == num_hsa_signal_create_calls
|
||||
assert functions.count("hsa_signal_destroy") == num_hsa_signal_destroy_calls
|
||||
|
||||
|
||||
def test_kernel_trace(kernel_trace_input_data):
|
||||
valid_kernel_names = ("copyA", "copyB", "copyC")
|
||||
|
||||
assert len(kernel_trace_input_data) == 3
|
||||
for row in kernel_trace_input_data:
|
||||
assert row["Kind"] == "KERNEL_DISPATCH"
|
||||
assert int(row["Agent_Id"]) > 0
|
||||
assert int(row["Queue_Id"]) > 0
|
||||
assert int(row["Kernel_Id"]) > 0
|
||||
assert row["Kernel_Name"] in valid_kernel_names
|
||||
assert int(row["Correlation_Id"]) > 0
|
||||
assert int(row["Workgroup_Size_X"]) == 64
|
||||
assert int(row["Workgroup_Size_Y"]) == 1
|
||||
assert int(row["Workgroup_Size_Z"]) == 1
|
||||
assert int(row["Grid_Size_X"]) == 64
|
||||
assert int(row["Grid_Size_Y"]) == 1
|
||||
assert int(row["Grid_Size_Z"]) == 1
|
||||
assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
@@ -25,11 +25,6 @@ set_target_properties(
|
||||
${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}
|
||||
INSTALL_RPATH "\$ORIGIN:\$ORIGIN/..")
|
||||
|
||||
install(
|
||||
TARGETS rocprofiler-sdk-json-tool
|
||||
DESTINATION lib/rocprofiler-sdk
|
||||
COMPONENT tests)
|
||||
|
||||
# tool library which just checks that tools can be compiled with C language
|
||||
add_library(rocprofiler-sdk-c-tool SHARED)
|
||||
target_sources(rocprofiler-sdk-c-tool PRIVATE c-tool.c)
|
||||
|
||||
Ссылка в новой задаче
Block a user