Add ctests to verify roctx api (#260)

---------

Co-authored-by: David Galiffi <David.Galiffi@amd.com>

[ROCm/rocprofiler-systems commit: 3631362903]
This commit is contained in:
anujshuk-amd
2025-06-25 23:31:04 +05:30
کامیت شده توسط GitHub
والد 5c5de6fe2a
کامیت 0c91a0d8ed
5فایلهای تغییر یافته به همراه406 افزوده شده و 0 حذف شده
@@ -1,3 +1,25 @@
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)
project(rocprofiler-systems-examples LANGUAGES C CXX)
@@ -55,3 +77,4 @@ add_subdirectory(trace-time-window)
add_subdirectory(fork)
add_subdirectory(videodecode)
add_subdirectory(jpegdecode)
add_subdirectory(roctx)
@@ -0,0 +1,118 @@
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)
project(rocprofiler-systems-roctx-example LANGUAGES CXX)
if(ROCPROFSYS_DISABLE_EXAMPLES)
get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME)
if(
${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES
OR ${_DIR} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES
)
return()
endif()
endif()
find_package(hip QUIET HINTS ${ROCmVersion_DIR} PATHS ${ROCmVersion_DIR})
find_program(
HIPCC_EXECUTABLE
NAMES hipcc
HINTS ${ROCmVersion_DIR} ${ROCM_PATH}
ENV ROCM_PATH
/opt/rocm
PATHS ${ROCmVersion_DIR} ${ROCM_PATH}
ENV ROCM_PATH
/opt/rocm
NO_CACHE
)
mark_as_advanced(HIPCC_EXECUTABLE)
if(NOT HIPCC_EXECUTABLE)
message(AUTHOR_WARNING "hipcc could not be found. Cannot build roctx target")
return()
endif()
if(NOT CMAKE_CXX_COMPILER_IS_HIPCC AND HIPCC_EXECUTABLE)
if(
CMAKE_CXX_COMPILER STREQUAL HIPCC_EXECUTABLE
OR "${CMAKE_CXX_COMPILER}" MATCHES "hipcc"
)
set(CMAKE_CXX_COMPILER_IS_HIPCC 1 CACHE BOOL "HIP compiler")
endif()
endif()
if(
(
NOT CMAKE_CXX_COMPILER_IS_HIPCC
OR (NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT hip_FOUND)
)
AND (NOT COMMAND rocprofiler_systems_custom_compilation AND NOT HIPCC_EXECUTABLE)
)
message(AUTHOR_WARNING "roctx target could not be built")
return()
endif()
add_executable(roctx roctx.cpp)
target_link_libraries(roctx PRIVATE Threads::Threads)
if(
CMAKE_CXX_COMPILER_ID MATCHES "Clang"
AND NOT CMAKE_CXX_COMPILER_IS_HIPCC
AND NOT HIPCC_EXECUTABLE
)
target_link_libraries(
roctx
PRIVATE
$<TARGET_NAME_IF_EXISTS:rocprofiler-systems::rocprofiler-systems-compile-options>
$<TARGET_NAME_IF_EXISTS:hip::host>
$<TARGET_NAME_IF_EXISTS:hip::device>
)
else()
target_compile_options(roctx PRIVATE -W -Wall)
endif()
# Find the library
find_library(
ROCTX_LIBRARY
NAMES rocprofiler-sdk-roctx
PATHS ${ROCM_PATH}/lib ${ROCM_PATH}/lib64
REQUIRED
)
# Link it to target
target_link_libraries(roctx PRIVATE ${ROCTX_LIBRARY})
if("${CMAKE_BUILD_TYPE}" MATCHES "Release")
target_compile_options(roctx PRIVATE -g1)
endif()
if(NOT CMAKE_CXX_COMPILER_IS_HIPCC AND HIPCC_EXECUTABLE)
# defined in MacroUtilities.cmake
rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET roctx)
endif()
if(ROCPROFSYS_INSTALL_EXAMPLES)
install(TARGETS roctx DESTINATION bin COMPONENT rocprofiler-systems-examples)
endif()
@@ -0,0 +1,164 @@
// MIT License
//
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <mutex>
#include <random>
#include <sstream>
#include <stdexcept>
#include <thread>
#include <vector>
// HIP and ROCm profiling headers
#include <hip/hip_runtime.h>
#include <hsa/hsa.h>
#include <rocprofiler-sdk-roctx/roctx.h>
// Define HIP_API_CALL macro for error handling.
#define HIP_API_CALL(CALL) \
{ \
hipError_t error_ = (CALL); \
if(error_ != hipSuccess) \
{ \
auto _hip_api_print_lk = auto_lock_t{ print_lock }; \
fprintf(stderr, "%s:%d :: HIP error : %s\n", __FILE__, __LINE__, \
hipGetErrorString(error_)); \
throw std::runtime_error("hip_api_call"); \
} \
}
namespace
{
using auto_lock_t = std::unique_lock<std::mutex>;
auto print_lock = std::mutex{};
} // namespace
// HIP Kernel Function
__global__ void
hipKernelLaunch(int* data)
{
int idx = threadIdx.x + blockIdx.x * blockDim.x;
data[idx] += 1;
}
// Function to execute GPU workload with ROCTx profiling
void
gpu_workload()
{
// Start a profiling range and push a sub-range for launching the kernel.
uint64_t rangeId = roctxRangeStart("roctxRangeStart_GPU_Compute");
roctxRangePush("roctxRangePush_HIP_Kernel");
const int N = 256;
int* d_data = nullptr;
// Allocate device memory
HIP_API_CALL(hipMalloc(&d_data, N * sizeof(int)));
// Launch the kernel
hipLaunchKernelGGL(HIP_KERNEL_NAME(hipKernelLaunch), dim3(1), dim3(N), 0, 0, d_data);
// Wait for GPU to finish
HIP_API_CALL(hipDeviceSynchronize());
// Free device memory
HIP_API_CALL(hipFree(d_data));
// Pop the sub-range and stop the profiling range
roctxRangePop();
roctxRangeStop(rangeId);
}
// Function executed in a separate thread with ROCTx annotations.
void
roctxThreadFunc()
{
roctxNameOsThread("roctxNameOsThread_New");
roctxMark("roctxMark_Thread_Start");
gpu_workload();
roctxMark("roctxMark_End");
}
void
run_profiling()
{
// Label HIP device and stream
int deviceId{ 0 };
HIP_API_CALL(hipGetDevice(&deviceId));
roctxNameHipDevice("roctxNameHipDevice_device_id", deviceId);
hipStream_t stream = {};
HIP_API_CALL(hipStreamCreate(&stream));
roctxNameHipStream("roctxNameHipStream_hip_stream", stream);
// Insert a marker before the GPU workload
roctxMark("roctxMark_GPU_workload");
// Start a nested profiling range.
roctxRangePush("roctxRangePush_run_profiling");
// Execute GPU workload
gpu_workload();
// Pause profiling steps using ROCTx APIs.
roctx_thread_id_t roctx_tid{}; // Thread identifier structure
roctxGetThreadId(&roctx_tid);
// Set names for OS thread, HSA agent, HIP device and stream.
roctxNameOsThread(std::to_string(roctx_tid).c_str());
// Prepare an hsa_agent_t with roctx thread id as a handle (example usage):
hsa_agent_t hsa_agent = { .handle = roctx_tid };
roctxNameHsaAgent("roctxNameHsaAgent_hsa_agent", &hsa_agent);
roctxNameHipDevice("roctxNameHipDevice_hipdevice", 0);
auto* hip_stream = hipStream_t{};
roctxNameHipStream("roctxNameHipStream_hip_stream", hip_stream);
// Pause ROCTx profiling for the current thread.
roctxProfilerPause(roctx_tid);
roctxMark("roctxMark_RoctxProfilerPause_End");
// Start a separate thread executing additional profiling-annotated work.
std::thread worker(roctxThreadFunc);
worker.join();
// Resume ROCTx profiling.
roctxProfilerResume(roctx_tid);
// End the nested profiling range.
roctxRangePop();
// Insert a marker after execution of workload.
roctxMark("roctxMark_Finished_GPU");
HIP_API_CALL(hipStreamDestroy(stream));
}
int
main()
{
std::cout << "Roctx profiling started!" << std::endl;
run_profiling();
std::cout << "Roctx profiling Completed!" << std::endl;
return 0;
}
@@ -47,5 +47,6 @@ include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-causal-tests.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-python-tests.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-decode-tests.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-nic-perf.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-roctx-tests.cmake)
add_subdirectory(source)
@@ -0,0 +1,100 @@
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# -------------------------------------------------------------------------------------- #
#
# roctx tests
#
# -------------------------------------------------------------------------------------- #
# Ensure ROCPROFSYS_ROCM_DOMAINS is defined
set(_roctx_environment
"${_base_environment}"
"ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api,marker_api,kernel_dispatch"
)
rocprofiler_systems_add_test(
# SKIP_BASELINE SKIP_RUNTIME SKIP_REWRITE SKIP_RUNTIME
NAME roctx-api
TARGET roctx
GPU ON
ENVIRONMENT "${_roctx_environment}"
)
set(ROCTX_LABEL
roctxMark_GPU_workload
roctxRangePushA
roctxRangePushA
roctxRangeStartA
roctxRangeStartA
roctxRangePush_HIP_Kernel
roctxRangePush_HIP_Kernel
roctxRangeStart_GPU_Compute
roctxRangeStart_GPU_Compute
roctxGetThreadId
roctxMark_RoctxProfilerPause_End
roctxMark_Thread_Start
roctxMark_End
roctxRangePush_run_profiling
roctxMark_Finished_GPU
)
set(ROCTX_COUNT
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
)
set(ROCTX_DEPTH
1
1
0
1
0
1
0
1
0
1
1
0
0
1
1
)
rocprofiler_systems_add_validation_test(
NAME roctx-api-sampling
PERFETTO_METRIC "rocm_marker_api"
PERFETTO_FILE "perfetto-trace.proto"
LABELS "roctx"
ARGS -l ${ROCTX_LABEL} -c ${ROCTX_COUNT} -d ${ROCTX_DEPTH} -p
)