From 952d1dabe252450eda9e0a223aa178f4fa0fc695 Mon Sep 17 00:00:00 2001 From: Venkateshwar Reddy Kandula Date: Mon, 6 Oct 2025 13:09:39 -0500 Subject: [PATCH] [ROCProfiler-SDK][ROCR] HSA New API changes for HSA_AMD_EXT_API_TABLE_STEP_VERSION 8 (#1182) * add new hsa ext api for version 8. * use fmt instead of ostream. * override rccl from therock * Update rocprofiler-sdk-continuous_integration.yml * Update rocprofiler-sdk-continuous_integration.yml * Update rocprofiler-sdk-continuous_integration.yml * enable rocr-build * format * disable att consecutive-kernels tests. * Enable ROCR build in code coverage workflow --------- Co-authored-by: Venkateshwar Reddy Kandula --- .../rocprofiler-sdk-code_coverage.yml | 2 +- ...rocprofiler-sdk-continuous_integration.yml | 2 +- .../rocprofiler-sdk/cxx/enum_string.hpp | 6 +++++ .../rocprofiler-sdk/hsa/amd_ext_api_id.h | 4 ++++ .../include/rocprofiler-sdk/hsa/api_args.h | 20 +++++++++++++++++ .../source/lib/rocprofiler-sdk/hsa/abi.cpp | 6 +++++ .../lib/rocprofiler-sdk/hsa/details/fmt.hpp | 17 ++++++++++++++ .../lib/rocprofiler-sdk/hsa/hsa.def.cpp | 22 +++++++++++++++++++ .../source/lib/rocprofiler-sdk/hsa/utils.hpp | 1 + .../att-consecutive-kernels/CMakeLists.txt | 4 ++++ 10 files changed, 82 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rocprofiler-sdk-code_coverage.yml b/.github/workflows/rocprofiler-sdk-code_coverage.yml index 173832aa46..2fb0600ad6 100644 --- a/.github/workflows/rocprofiler-sdk-code_coverage.yml +++ b/.github/workflows/rocprofiler-sdk-code_coverage.yml @@ -55,7 +55,7 @@ env: mi3xx_EXCLUDE_LABEL_REGEX: "" navi4_EXCLUDE_LABEL_REGEX: "" GLOBAL_CMAKE_OPTIONS: "" - DISABLE_ROCR_BUILD: "true" + DISABLE_ROCR_BUILD: "false" jobs: code-coverage: diff --git a/.github/workflows/rocprofiler-sdk-continuous_integration.yml b/.github/workflows/rocprofiler-sdk-continuous_integration.yml index 4df949294a..0262bb4460 100644 --- a/.github/workflows/rocprofiler-sdk-continuous_integration.yml +++ b/.github/workflows/rocprofiler-sdk-continuous_integration.yml @@ -62,7 +62,7 @@ env: navi4_EXCLUDE_LABEL_REGEX: "" GLOBAL_CMAKE_OPTIONS: "" - DISABLE_ROCR_BUILD: "true" + DISABLE_ROCR_BUILD: "false" CI_MODE: ${{ github.event_name == 'schedule' && 'Nightly' || 'Continuous' }} diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp index 7801ea8726..b998fe029c 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp @@ -356,6 +356,10 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_memory_get_preferr # if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x07 ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_portable_export_dmabuf_v2); # endif +# if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x08 +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_ais_file_write); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_ais_file_read); +# endif #endif #if HSA_AMD_EXT_API_TABLE_MAJOR_VERSION == 0x01 @@ -377,6 +381,8 @@ static_assert(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST == 72); static_assert(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST == 73); # elif HSA_AMD_EXT_API_TABLE_STEP_VERSION == 0x07 static_assert(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST == 74); +# elif HSA_AMD_EXT_API_TABLE_STEP_VERSION == 0x08 +static_assert(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST == 76); # else # if !defined(ROCPROFILER_UNSAFE_NO_VERSION_CHECK) && \ (defined(ROCPROFILER_CI) && ROCPROFILER_CI > 0) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/amd_ext_api_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/amd_ext_api_id.h index 18f9e87d60..17e5394cfd 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/amd_ext_api_id.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/amd_ext_api_id.h @@ -121,6 +121,10 @@ typedef enum rocprofiler_hsa_amd_ext_api_id_t // NOLINT(performance-enum-size) # if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x07 ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_portable_export_dmabuf_v2, # endif +# if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x08 + ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_ais_file_write, + ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_ais_file_read, +# endif #endif ROCPROFILER_HSA_AMD_EXT_API_ID_LAST, diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/api_args.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/api_args.h index 379eab6bd8..9736e7a426 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/api_args.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/api_args.h @@ -1444,6 +1444,26 @@ typedef union rocprofiler_hsa_api_args_t uint64_t flags; } hsa_amd_portable_export_dmabuf_v2; # endif +# if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x08 + struct + { + hsa_amd_ais_file_handle_t handle; + void* devicePtr; + uint64_t size; + int64_t file_offset; + uint64_t* size_copied; + int32_t* status; + } hsa_amd_ais_file_write; + struct + { + hsa_amd_ais_file_handle_t handle; + void* devicePtr; + uint64_t size; + int64_t file_offset; + uint64_t* size_copied; + int32_t* status; + } hsa_amd_ais_file_read; +# endif #endif } rocprofiler_hsa_api_args_t; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/abi.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/abi.cpp index 2a5db235c5..1ea52d6c6b 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/abi.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/abi.cpp @@ -57,6 +57,8 @@ ROCP_SDK_ENFORCE_ABI_VERSIONING(::AmdExtTable, 73); ROCP_SDK_ENFORCE_ABI_VERSIONING(::AmdExtTable, 74); #elif HSA_AMD_EXT_API_TABLE_STEP_VERSION == 0x07 ROCP_SDK_ENFORCE_ABI_VERSIONING(::AmdExtTable, 75); +#elif HSA_AMD_EXT_API_TABLE_STEP_VERSION == 0x08 +ROCP_SDK_ENFORCE_ABI_VERSIONING(::AmdExtTable, 77); #else INTERNAL_CI_ROCP_SDK_ENFORCE_ABI_VERSIONING(::AmdExtTable, 0); #endif @@ -293,6 +295,10 @@ ROCP_SDK_ENFORCE_ABI(::AmdExtTable, hsa_amd_memory_get_preferred_copy_engine_fn, #if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x07 ROCP_SDK_ENFORCE_ABI(::AmdExtTable, hsa_amd_portable_export_dmabuf_v2_fn, 74); #endif +#if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x08 +ROCP_SDK_ENFORCE_ABI(::AmdExtTable, hsa_amd_ais_file_write_fn, 75); +ROCP_SDK_ENFORCE_ABI(::AmdExtTable, hsa_amd_ais_file_read_fn, 76); +#endif ROCP_SDK_ENFORCE_ABI(::ImageExtTable, hsa_ext_image_get_capability_fn, 1); ROCP_SDK_ENFORCE_ABI(::ImageExtTable, hsa_ext_image_data_get_info_fn, 2); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/details/fmt.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/details/fmt.hpp index de8adbd51b..c136a2739c 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/details/fmt.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/details/fmt.hpp @@ -168,4 +168,21 @@ struct formatter } } }; +#if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x08 +template <> +struct formatter +{ + template + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template + auto format(hsa_amd_ais_file_handle_t const& h, FormatContext& ctx) const + { + return fmt::format_to(ctx.out(), "{{fd={}, handle={}}}", h.fd, h.handle); + } +}; +#endif } // namespace fmt diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp index aa839a5c28..bb8bc45f0d 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.def.cpp @@ -496,6 +496,28 @@ HSA_API_INFO_DEFINITION_V(ROCPROFILER_HSA_TABLE_ID_AmdExt, offset, flags) # endif +# if HSA_AMD_EXT_API_TABLE_STEP_VERSION >= 0x08 +HSA_API_INFO_DEFINITION_V(ROCPROFILER_HSA_TABLE_ID_AmdExt, + ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_ais_file_write, + hsa_amd_ais_file_write, + hsa_amd_ais_file_write_fn, + handle, + devicePtr, + size, + file_offset, + size_copied, + status) +HSA_API_INFO_DEFINITION_V(ROCPROFILER_HSA_TABLE_ID_AmdExt, + ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_ais_file_read, + hsa_amd_ais_file_read, + hsa_amd_ais_file_read_fn, + handle, + devicePtr, + size, + file_offset, + size_copied, + status) +# endif # endif #elif defined(ROCPROFILER_LIB_ROCPROFILER_HSA_ASYNC_COPY_CPP_IMPL) && \ diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/utils.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/utils.hpp index f4d76b89f7..004cb466b9 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/utils.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/utils.hpp @@ -25,6 +25,7 @@ #include #include "lib/common/stringize_arg.hpp" +#include "lib/rocprofiler-sdk/hsa/details/fmt.hpp" #include #include diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/att-consecutive-kernels/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/att-consecutive-kernels/CMakeLists.txt index 20e968ba69..b5062c7b92 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/att-consecutive-kernels/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/rocprofv3/att-consecutive-kernels/CMakeLists.txt @@ -61,6 +61,10 @@ if(attdecoder_FOUND) set(ATT_LIB --att-library-path ${attdecoder_LIB_DIR}) endif() +if(ROCPROFILER_DISABLE_UNSTABLE_CTESTS) + set(IS_DISABLED ON) +endif() + # consecutive kernel test add_test( NAME rocprofv3-test-att-consecutive-kernels-execute