From 0c91a0d8ed162c67be8c13f03b98dc0cc23074b2 Mon Sep 17 00:00:00 2001 From: anujshuk-amd Date: Wed, 25 Jun 2025 23:31:04 +0530 Subject: [PATCH] Add ctests to verify roctx api (#260) --------- Co-authored-by: David Galiffi [ROCm/rocprofiler-systems commit: 36313629031c121e1f27c298d30f53948b3436d2] --- .../examples/CMakeLists.txt | 23 +++ .../examples/roctx/CMakeLists.txt | 118 +++++++++++++ .../examples/roctx/roctx.cpp | 164 ++++++++++++++++++ .../rocprofiler-systems/tests/CMakeLists.txt | 1 + .../tests/rocprof-sys-roctx-tests.cmake | 100 +++++++++++ 5 files changed, 406 insertions(+) create mode 100644 projects/rocprofiler-systems/examples/roctx/CMakeLists.txt create mode 100644 projects/rocprofiler-systems/examples/roctx/roctx.cpp create mode 100644 projects/rocprofiler-systems/tests/rocprof-sys-roctx-tests.cmake diff --git a/projects/rocprofiler-systems/examples/CMakeLists.txt b/projects/rocprofiler-systems/examples/CMakeLists.txt index dbc5736c7e..9c29d29dd3 100644 --- a/projects/rocprofiler-systems/examples/CMakeLists.txt +++ b/projects/rocprofiler-systems/examples/CMakeLists.txt @@ -1,3 +1,25 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) project(rocprofiler-systems-examples LANGUAGES C CXX) @@ -55,3 +77,4 @@ add_subdirectory(trace-time-window) add_subdirectory(fork) add_subdirectory(videodecode) add_subdirectory(jpegdecode) +add_subdirectory(roctx) diff --git a/projects/rocprofiler-systems/examples/roctx/CMakeLists.txt b/projects/rocprofiler-systems/examples/roctx/CMakeLists.txt new file mode 100644 index 0000000000..673901133c --- /dev/null +++ b/projects/rocprofiler-systems/examples/roctx/CMakeLists.txt @@ -0,0 +1,118 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR) + +project(rocprofiler-systems-roctx-example LANGUAGES CXX) + +if(ROCPROFSYS_DISABLE_EXAMPLES) + get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME) + + if( + ${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + OR ${_DIR} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES + ) + return() + endif() +endif() + +find_package(hip QUIET HINTS ${ROCmVersion_DIR} PATHS ${ROCmVersion_DIR}) + +find_program( + HIPCC_EXECUTABLE + NAMES hipcc + HINTS ${ROCmVersion_DIR} ${ROCM_PATH} + ENV ROCM_PATH + /opt/rocm + PATHS ${ROCmVersion_DIR} ${ROCM_PATH} + ENV ROCM_PATH + /opt/rocm + NO_CACHE +) +mark_as_advanced(HIPCC_EXECUTABLE) + +if(NOT HIPCC_EXECUTABLE) + message(AUTHOR_WARNING "hipcc could not be found. Cannot build roctx target") + return() +endif() + +if(NOT CMAKE_CXX_COMPILER_IS_HIPCC AND HIPCC_EXECUTABLE) + if( + CMAKE_CXX_COMPILER STREQUAL HIPCC_EXECUTABLE + OR "${CMAKE_CXX_COMPILER}" MATCHES "hipcc" + ) + set(CMAKE_CXX_COMPILER_IS_HIPCC 1 CACHE BOOL "HIP compiler") + endif() +endif() + +if( + ( + NOT CMAKE_CXX_COMPILER_IS_HIPCC + OR (NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT hip_FOUND) + ) + AND (NOT COMMAND rocprofiler_systems_custom_compilation AND NOT HIPCC_EXECUTABLE) +) + message(AUTHOR_WARNING "roctx target could not be built") + return() +endif() + +add_executable(roctx roctx.cpp) +target_link_libraries(roctx PRIVATE Threads::Threads) + +if( + CMAKE_CXX_COMPILER_ID MATCHES "Clang" + AND NOT CMAKE_CXX_COMPILER_IS_HIPCC + AND NOT HIPCC_EXECUTABLE +) + target_link_libraries( + roctx + PRIVATE + $ + $ + $ + ) +else() + target_compile_options(roctx PRIVATE -W -Wall) +endif() + +# Find the library +find_library( + ROCTX_LIBRARY + NAMES rocprofiler-sdk-roctx + PATHS ${ROCM_PATH}/lib ${ROCM_PATH}/lib64 + REQUIRED +) +# Link it to target +target_link_libraries(roctx PRIVATE ${ROCTX_LIBRARY}) + +if("${CMAKE_BUILD_TYPE}" MATCHES "Release") + target_compile_options(roctx PRIVATE -g1) +endif() + +if(NOT CMAKE_CXX_COMPILER_IS_HIPCC AND HIPCC_EXECUTABLE) + # defined in MacroUtilities.cmake + rocprofiler_systems_custom_compilation(COMPILER ${HIPCC_EXECUTABLE} TARGET roctx) +endif() + +if(ROCPROFSYS_INSTALL_EXAMPLES) + install(TARGETS roctx DESTINATION bin COMPONENT rocprofiler-systems-examples) +endif() diff --git a/projects/rocprofiler-systems/examples/roctx/roctx.cpp b/projects/rocprofiler-systems/examples/roctx/roctx.cpp new file mode 100644 index 0000000000..1b60155e5a --- /dev/null +++ b/projects/rocprofiler-systems/examples/roctx/roctx.cpp @@ -0,0 +1,164 @@ +// MIT License +// +// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// HIP and ROCm profiling headers +#include +#include +#include + +// Define HIP_API_CALL macro for error handling. +#define HIP_API_CALL(CALL) \ + { \ + hipError_t error_ = (CALL); \ + if(error_ != hipSuccess) \ + { \ + auto _hip_api_print_lk = auto_lock_t{ print_lock }; \ + fprintf(stderr, "%s:%d :: HIP error : %s\n", __FILE__, __LINE__, \ + hipGetErrorString(error_)); \ + throw std::runtime_error("hip_api_call"); \ + } \ + } + +namespace +{ +using auto_lock_t = std::unique_lock; +auto print_lock = std::mutex{}; +} // namespace + +// HIP Kernel Function +__global__ void +hipKernelLaunch(int* data) +{ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + data[idx] += 1; +} + +// Function to execute GPU workload with ROCTx profiling +void +gpu_workload() +{ + // Start a profiling range and push a sub-range for launching the kernel. + uint64_t rangeId = roctxRangeStart("roctxRangeStart_GPU_Compute"); + roctxRangePush("roctxRangePush_HIP_Kernel"); + + const int N = 256; + int* d_data = nullptr; + + // Allocate device memory + HIP_API_CALL(hipMalloc(&d_data, N * sizeof(int))); + + // Launch the kernel + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipKernelLaunch), dim3(1), dim3(N), 0, 0, d_data); + + // Wait for GPU to finish + HIP_API_CALL(hipDeviceSynchronize()); + + // Free device memory + HIP_API_CALL(hipFree(d_data)); + + // Pop the sub-range and stop the profiling range + roctxRangePop(); + roctxRangeStop(rangeId); +} + +// Function executed in a separate thread with ROCTx annotations. +void +roctxThreadFunc() +{ + roctxNameOsThread("roctxNameOsThread_New"); + roctxMark("roctxMark_Thread_Start"); + gpu_workload(); + roctxMark("roctxMark_End"); +} + +void +run_profiling() +{ + // Label HIP device and stream + int deviceId{ 0 }; + HIP_API_CALL(hipGetDevice(&deviceId)); + roctxNameHipDevice("roctxNameHipDevice_device_id", deviceId); + + hipStream_t stream = {}; + HIP_API_CALL(hipStreamCreate(&stream)); + roctxNameHipStream("roctxNameHipStream_hip_stream", stream); + + // Insert a marker before the GPU workload + roctxMark("roctxMark_GPU_workload"); + + // Start a nested profiling range. + roctxRangePush("roctxRangePush_run_profiling"); + + // Execute GPU workload + gpu_workload(); + + // Pause profiling steps using ROCTx APIs. + roctx_thread_id_t roctx_tid{}; // Thread identifier structure + roctxGetThreadId(&roctx_tid); + + // Set names for OS thread, HSA agent, HIP device and stream. + roctxNameOsThread(std::to_string(roctx_tid).c_str()); + // Prepare an hsa_agent_t with roctx thread id as a handle (example usage): + hsa_agent_t hsa_agent = { .handle = roctx_tid }; + roctxNameHsaAgent("roctxNameHsaAgent_hsa_agent", &hsa_agent); + roctxNameHipDevice("roctxNameHipDevice_hipdevice", 0); + auto* hip_stream = hipStream_t{}; + roctxNameHipStream("roctxNameHipStream_hip_stream", hip_stream); + + // Pause ROCTx profiling for the current thread. + roctxProfilerPause(roctx_tid); + roctxMark("roctxMark_RoctxProfilerPause_End"); + + // Start a separate thread executing additional profiling-annotated work. + std::thread worker(roctxThreadFunc); + worker.join(); + + // Resume ROCTx profiling. + roctxProfilerResume(roctx_tid); + + // End the nested profiling range. + roctxRangePop(); + + // Insert a marker after execution of workload. + roctxMark("roctxMark_Finished_GPU"); + HIP_API_CALL(hipStreamDestroy(stream)); +} + +int +main() +{ + std::cout << "Roctx profiling started!" << std::endl; + run_profiling(); + std::cout << "Roctx profiling Completed!" << std::endl; + return 0; +} diff --git a/projects/rocprofiler-systems/tests/CMakeLists.txt b/projects/rocprofiler-systems/tests/CMakeLists.txt index 05e520c4cf..6e3241fb86 100644 --- a/projects/rocprofiler-systems/tests/CMakeLists.txt +++ b/projects/rocprofiler-systems/tests/CMakeLists.txt @@ -47,5 +47,6 @@ include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-causal-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-python-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-decode-tests.cmake) include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-nic-perf.cmake) +include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-roctx-tests.cmake) add_subdirectory(source) diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-roctx-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-roctx-tests.cmake new file mode 100644 index 0000000000..c8094a12cc --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocprof-sys-roctx-tests.cmake @@ -0,0 +1,100 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# -------------------------------------------------------------------------------------- # +# +# roctx tests +# +# -------------------------------------------------------------------------------------- # +# Ensure ROCPROFSYS_ROCM_DOMAINS is defined +set(_roctx_environment + "${_base_environment}" + "ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api,marker_api,kernel_dispatch" +) +rocprofiler_systems_add_test( + # SKIP_BASELINE SKIP_RUNTIME SKIP_REWRITE SKIP_RUNTIME + NAME roctx-api + TARGET roctx + GPU ON + ENVIRONMENT "${_roctx_environment}" +) +set(ROCTX_LABEL + roctxMark_GPU_workload + roctxRangePushA + roctxRangePushA + roctxRangeStartA + roctxRangeStartA + roctxRangePush_HIP_Kernel + roctxRangePush_HIP_Kernel + roctxRangeStart_GPU_Compute + roctxRangeStart_GPU_Compute + roctxGetThreadId + roctxMark_RoctxProfilerPause_End + roctxMark_Thread_Start + roctxMark_End + roctxRangePush_run_profiling + roctxMark_Finished_GPU +) + +set(ROCTX_COUNT + 1 + 2 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 +) + +set(ROCTX_DEPTH + 1 + 1 + 0 + 1 + 0 + 1 + 0 + 1 + 0 + 1 + 1 + 0 + 0 + 1 + 1 +) + +rocprofiler_systems_add_validation_test( + NAME roctx-api-sampling + PERFETTO_METRIC "rocm_marker_api" + PERFETTO_FILE "perfetto-trace.proto" + LABELS "roctx" + ARGS -l ${ROCTX_LABEL} -c ${ROCTX_COUNT} -d ${ROCTX_DEPTH} -p +)