From b509e9bd7728f4f0b0f8d647058080314e69d1d2 Mon Sep 17 00:00:00 2001 From: Benjamin Welton Date: Thu, 29 Jan 2026 09:26:33 -0800 Subject: [PATCH] [rocprofiler-sdk] Fix domain_ops_padding for 515+ HIP operations (#2941) * [rocprofiler-sdk] Fix domain_ops_padding for 515+ HIP operations The HIP runtime API now has 515+ operations (as of ROCm 7.x), but domain_ops_padding was set to 512. This caused std::out_of_range exceptions when checking operations >= 512 via std::bitset::test(). Changes: - Increase domain_ops_padding from 512 to 1024 - Add compile-time static_assert to validate padding is sufficient for all API domains (HIP, HSA, marker, RCCL, rocDecode, rocJPEG) Co-Authored-By: Claude (claude-opus-4.5) * Update projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/domain.cpp Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * [rocprofiler-sdk] Apply clang-format-11 to domain.cpp Co-Authored-By: Claude (claude-opus-4.5) * Rework implementation to ensure coverage of all operation enums * Fix compiler error in unit test for enum_string.cpp * Fix data types of domain_ops_padding values * Revert some changes in domain.cpp --------- Co-authored-by: Claude (claude-opus-4.5) Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Jonathan R. Madsen --- .../rocprofiler-sdk/cxx/enum_string.hpp | 207 +++++++++++++----- .../lib/rocprofiler-sdk/context/domain.hpp | 6 +- .../source/lib/rocprofiler-sdk/hip/hip.cpp | 6 +- .../source/lib/rocprofiler-sdk/hsa/hsa.cpp | 6 +- .../lib/rocprofiler-sdk/marker/marker.cpp | 6 +- .../rocprofiler-sdk/marker/range_marker.cpp | 6 +- .../source/lib/rocprofiler-sdk/rccl/rccl.cpp | 6 +- .../rocprofiler-sdk/rocdecode/rocdecode.cpp | 6 +- .../lib/rocprofiler-sdk/rocjpeg/rocjpeg.cpp | 6 +- .../lib/rocprofiler-sdk/tests/enum_string.cpp | 6 +- 10 files changed, 195 insertions(+), 66 deletions(-) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp index 690585f0ba..4be88eecd1 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/enum_string.hpp @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -46,7 +47,7 @@ static constexpr size_t value = VALUE; \ }; -#define ROCPROFILER_ENUM_INFO(ENUM_T, BEG_VALUE, END_VALUE, IS_BITSET) \ +#define ROCPROFILER_ENUM_INFO(ENUM_T, BEG_VALUE, END_VALUE, IS_BITSET, IS_OPERATION) \ template <> \ struct rocprofiler_enum_info \ { \ @@ -63,6 +64,9 @@ else \ return Idx; \ } \ + static_assert(!IS_OPERATION || \ + (end <= ::rocprofiler::sdk::details::context_domain_ops_padding), \ + "operation enum range exceeds domain last value"); \ }; namespace rocprofiler @@ -71,6 +75,13 @@ namespace sdk { namespace details { +// Number of bits to reserve all op codes. This is set here so we can apply static_asserts to ensure +// that any operation enumeration does not exceed this value. This value is used in +// "lib/rocprofiler-sdk/context/domain.hpp" to create a bitset for the enabled operations. +constexpr size_t context_domain_ops_padding = 1024; +constexpr auto is_operation = true; +constexpr auto not_operation = false; + template struct rocprofiler_enum_label { @@ -119,19 +130,44 @@ get_enum_label(EnumT val, std::index_sequence) } // Table ID -ROCPROFILER_ENUM_INFO(rocprofiler_hsa_table_id_t, 0, ROCPROFILER_HSA_TABLE_ID_LAST, false) -ROCPROFILER_ENUM_INFO(rocprofiler_hip_table_id_t, 0, ROCPROFILER_HIP_TABLE_ID_LAST, false) -ROCPROFILER_ENUM_INFO(rocprofiler_marker_table_id_t, 0, ROCPROFILER_MARKER_TABLE_ID_LAST, false) -ROCPROFILER_ENUM_INFO(rocprofiler_rccl_table_id_t, 0, ROCPROFILER_RCCL_TABLE_ID_LAST, false) +ROCPROFILER_ENUM_INFO(rocprofiler_hsa_table_id_t, + 0, + ROCPROFILER_HSA_TABLE_ID_LAST, + false, + not_operation) +ROCPROFILER_ENUM_INFO(rocprofiler_hip_table_id_t, + 0, + ROCPROFILER_HIP_TABLE_ID_LAST, + false, + not_operation) +ROCPROFILER_ENUM_INFO(rocprofiler_marker_table_id_t, + 0, + ROCPROFILER_MARKER_TABLE_ID_LAST, + false, + not_operation) +ROCPROFILER_ENUM_INFO(rocprofiler_rccl_table_id_t, + 0, + ROCPROFILER_RCCL_TABLE_ID_LAST, + false, + not_operation) ROCPROFILER_ENUM_INFO(rocprofiler_rocdecode_table_id_t, 0, ROCPROFILER_ROCDECODE_TABLE_ID_LAST, - false) -ROCPROFILER_ENUM_INFO(rocprofiler_rocjpeg_table_id_t, 0, ROCPROFILER_ROCJPEG_TABLE_ID_LAST, false) + false, + not_operation) +ROCPROFILER_ENUM_INFO(rocprofiler_rocjpeg_table_id_t, + 0, + ROCPROFILER_ROCJPEG_TABLE_ID_LAST, + false, + not_operation) // table enums // rocprofiler_hsa_core_api_id_t -ROCPROFILER_ENUM_INFO(rocprofiler_hsa_core_api_id_t, 0, ROCPROFILER_HSA_CORE_API_ID_LAST, false) +ROCPROFILER_ENUM_INFO(rocprofiler_hsa_core_api_id_t, + 0, + ROCPROFILER_HSA_CORE_API_ID_LAST, + false, + is_operation) ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_CORE_API_ID_hsa_init); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_CORE_API_ID_hsa_shut_down); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_CORE_API_ID_hsa_system_get_info); @@ -263,7 +299,8 @@ static_assert(ROCPROFILER_HSA_CORE_API_ID_LAST == 125); ROCPROFILER_ENUM_INFO(rocprofiler_hsa_amd_ext_api_id_t, 0, ROCPROFILER_HSA_AMD_EXT_API_ID_LAST, - false) + false, + is_operation) ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_coherency_get_type); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_coherency_set_type); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_AMD_EXT_API_ID_hsa_amd_profiling_set_profiler_enabled); @@ -406,7 +443,8 @@ static_assert(false, "Support for HSA_AMD_EXT_API_TABLE_MAJOR_VERSION is require ROCPROFILER_ENUM_INFO(rocprofiler_hsa_image_ext_api_id_t, 0, ROCPROFILER_HSA_IMAGE_EXT_API_ID_LAST, - false) + false, + is_operation) ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_IMAGE_EXT_API_ID_hsa_ext_image_get_capability); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_IMAGE_EXT_API_ID_hsa_ext_image_data_get_info); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_IMAGE_EXT_API_ID_hsa_ext_image_create); @@ -426,7 +464,8 @@ static_assert(ROCPROFILER_HSA_IMAGE_EXT_API_ID_LAST == 13); ROCPROFILER_ENUM_INFO(rocprofiler_hsa_finalize_ext_api_id_t, 0, ROCPROFILER_HSA_FINALIZE_EXT_API_ID_LAST, - false) + false, + is_operation) ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_FINALIZE_EXT_API_ID_hsa_ext_program_create); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_FINALIZE_EXT_API_ID_hsa_ext_program_destroy); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HSA_FINALIZE_EXT_API_ID_hsa_ext_program_add_module); @@ -439,7 +478,8 @@ static_assert(ROCPROFILER_HSA_FINALIZE_EXT_API_ID_LAST == 6); ROCPROFILER_ENUM_INFO(rocprofiler_hip_compiler_api_id_t, 0, ROCPROFILER_HIP_COMPILER_API_ID_LAST, - false) + false, + is_operation) ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_COMPILER_API_ID___hipPopCallConfiguration); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_COMPILER_API_ID___hipPushCallConfiguration); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_COMPILER_API_ID___hipRegisterFatBinary); @@ -455,7 +495,8 @@ static_assert(ROCPROFILER_HIP_COMPILER_API_ID_LAST == 9); ROCPROFILER_ENUM_INFO(rocprofiler_hip_runtime_api_id_t, 0, ROCPROFILER_HIP_RUNTIME_API_ID_LAST, - false) + false, + is_operation) ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipApiName); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipArray3DCreate); ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipArray3DGetDescriptor); @@ -1076,7 +1117,8 @@ static_assert(false, "Support for new HIP_RUNTIME_API_TABLE_STEP_VERSION enumera ROCPROFILER_ENUM_INFO(rocprofiler_marker_core_api_id_t, 0, ROCPROFILER_MARKER_CORE_API_ID_LAST, - false) + false, + is_operation) ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA); ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA); ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop); @@ -1089,7 +1131,8 @@ static_assert(ROCPROFILER_MARKER_CORE_API_ID_LAST == 6); ROCPROFILER_ENUM_INFO(rocprofiler_marker_control_api_id_t, 0, ROCPROFILER_MARKER_CONTROL_API_ID_LAST, - false) + false, + is_operation) ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerPause); ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerResume); static_assert(ROCPROFILER_MARKER_CONTROL_API_ID_LAST == 2); @@ -1098,7 +1141,8 @@ static_assert(ROCPROFILER_MARKER_CONTROL_API_ID_LAST == 2); ROCPROFILER_ENUM_INFO(rocprofiler_marker_name_api_id_t, 0, ROCPROFILER_MARKER_NAME_API_ID_LAST, - false) + false, + is_operation) ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_NAME_API_ID_roctxNameOsThread); ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_NAME_API_ID_roctxNameHsaAgent); ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_NAME_API_ID_roctxNameHipDevice); @@ -1108,7 +1152,8 @@ static_assert(ROCPROFILER_MARKER_NAME_API_ID_LAST == 4); ROCPROFILER_ENUM_INFO(rocprofiler_marker_core_range_api_id_t, 0, ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST, - false) + false, + is_operation) ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA); ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA); ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA); @@ -1116,7 +1161,11 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId); static_assert(ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST == 4); // rocprofiler_ompt_operation_t -ROCPROFILER_ENUM_INFO(rocprofiler_ompt_operation_t, 0, ROCPROFILER_OMPT_ID_LAST, false); +ROCPROFILER_ENUM_INFO(rocprofiler_ompt_operation_t, + 0, + ROCPROFILER_OMPT_ID_LAST, + false, + is_operation); ROCPROFILER_ENUM_LABEL(ROCPROFILER_OMPT_ID_thread_begin); ROCPROFILER_ENUM_LABEL(ROCPROFILER_OMPT_ID_thread_end); ROCPROFILER_ENUM_LABEL(ROCPROFILER_OMPT_ID_parallel_begin); @@ -1151,7 +1200,11 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_OMPT_ID_callback_functions); static_assert(ROCPROFILER_OMPT_ID_LAST == 31); // rocprofiler_rccl_api_id_t -ROCPROFILER_ENUM_INFO(rocprofiler_rccl_api_id_t, 0, ROCPROFILER_RCCL_API_ID_LAST, false) +ROCPROFILER_ENUM_INFO(rocprofiler_rccl_api_id_t, + 0, + ROCPROFILER_RCCL_API_ID_LAST, + false, + is_operation) ROCPROFILER_ENUM_LABEL(ROCPROFILER_RCCL_API_ID_ncclAllGather); ROCPROFILER_ENUM_LABEL(ROCPROFILER_RCCL_API_ID_ncclAllReduce); ROCPROFILER_ENUM_LABEL(ROCPROFILER_RCCL_API_ID_ncclAllToAll); @@ -1212,7 +1265,11 @@ static_assert(false, "Support for new RCCL_API_TRACE_VERSION_PATCH enumerations #endif // rocprofiler_rocdecode_api_id_t -ROCPROFILER_ENUM_INFO(rocprofiler_rocdecode_api_id_t, 0, ROCPROFILER_ROCDECODE_API_ID_LAST, false) +ROCPROFILER_ENUM_INFO(rocprofiler_rocdecode_api_id_t, + 0, + ROCPROFILER_ROCDECODE_API_ID_LAST, + false, + is_operation) ROCPROFILER_ENUM_LABEL(ROCPROFILER_ROCDECODE_API_ID_rocDecCreateVideoParser); ROCPROFILER_ENUM_LABEL(ROCPROFILER_ROCDECODE_API_ID_rocDecParseVideoData); ROCPROFILER_ENUM_LABEL(ROCPROFILER_ROCDECODE_API_ID_rocDecDestroyVideoParser); @@ -1245,7 +1302,11 @@ static_assert(false, #endif // rocprofiler_rocjpeg_api_id_t -ROCPROFILER_ENUM_INFO(rocprofiler_rocjpeg_api_id_t, 0, ROCPROFILER_ROCJPEG_API_ID_LAST, false); +ROCPROFILER_ENUM_INFO(rocprofiler_rocjpeg_api_id_t, + 0, + ROCPROFILER_ROCJPEG_API_ID_LAST, + false, + is_operation); ROCPROFILER_ENUM_LABEL(ROCPROFILER_ROCJPEG_API_ID_rocJpegStreamCreate); ROCPROFILER_ENUM_LABEL(ROCPROFILER_ROCJPEG_API_ID_rocJpegStreamParse); ROCPROFILER_ENUM_LABEL(ROCPROFILER_ROCJPEG_API_ID_rocJpegStreamDestroy); @@ -1261,147 +1322,179 @@ static_assert(ROCPROFILER_ROCJPEG_API_ID_LAST == 9); ROCPROFILER_ENUM_INFO(rocprofiler_status_t, ROCPROFILER_STATUS_SUCCESS, ROCPROFILER_STATUS_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_buffer_category_t, ROCPROFILER_BUFFER_CATEGORY_NONE, ROCPROFILER_BUFFER_CATEGORY_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_agent_type_t, ROCPROFILER_AGENT_TYPE_NONE, ROCPROFILER_AGENT_TYPE_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_callback_phase_t, ROCPROFILER_CALLBACK_PHASE_NONE, ROCPROFILER_CALLBACK_PHASE_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_callback_tracing_kind_t, ROCPROFILER_CALLBACK_TRACING_NONE, ROCPROFILER_CALLBACK_TRACING_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_buffer_tracing_kind_t, ROCPROFILER_BUFFER_TRACING_NONE, ROCPROFILER_BUFFER_TRACING_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_code_object_operation_t, ROCPROFILER_CODE_OBJECT_NONE, ROCPROFILER_CODE_OBJECT_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_memory_copy_operation_t, ROCPROFILER_MEMORY_COPY_NONE, ROCPROFILER_MEMORY_COPY_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_memory_allocation_operation_t, ROCPROFILER_MEMORY_ALLOCATION_NONE, ROCPROFILER_MEMORY_ALLOCATION_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_kernel_dispatch_operation_t, ROCPROFILER_KERNEL_DISPATCH_NONE, ROCPROFILER_KERNEL_DISPATCH_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_pc_sampling_method_t, ROCPROFILER_PC_SAMPLING_METHOD_NONE, ROCPROFILER_PC_SAMPLING_METHOD_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_pc_sampling_unit_t, ROCPROFILER_PC_SAMPLING_UNIT_NONE, ROCPROFILER_PC_SAMPLING_UNIT_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_buffer_policy_t, ROCPROFILER_BUFFER_POLICY_NONE, ROCPROFILER_BUFFER_POLICY_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_scratch_memory_operation_t, ROCPROFILER_SCRATCH_MEMORY_NONE, ROCPROFILER_SCRATCH_MEMORY_LAST, - false); + false, + is_operation); ROCPROFILER_ENUM_INFO(rocprofiler_scratch_alloc_flag_t, ROCPROFILER_SCRATCH_ALLOC_FLAG_NONE, HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT + 1, - true); + true, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_runtime_initialization_operation_t, ROCPROFILER_RUNTIME_INITIALIZATION_NONE, ROCPROFILER_RUNTIME_INITIALIZATION_LAST, - false); + false, + is_operation); ROCPROFILER_ENUM_INFO(rocprofiler_counter_info_version_id_t, ROCPROFILER_COUNTER_INFO_VERSION_NONE, ROCPROFILER_COUNTER_INFO_VERSION_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_counter_record_kind_t, ROCPROFILER_COUNTER_RECORD_NONE, ROCPROFILER_COUNTER_RECORD_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_counter_flag_t, ROCPROFILER_COUNTER_FLAG_NONE, ROCPROFILER_COUNTER_FLAG_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_code_object_storage_type_t, ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_NONE, ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_LAST, - false); - + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_runtime_library_t, ROCPROFILER_LIBRARY, details::compute_bitset_sequence_range(), - true); + true, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_intercept_table_t, ROCPROFILER_HSA_TABLE, details::compute_bitset_sequence_range(), - true); - + true, + not_operation); // callback_tracing.h ROCPROFILER_ENUM_INFO(rocprofiler_pc_sampling_record_kind_t, ROCPROFILER_PC_SAMPLING_RECORD_NONE, ROCPROFILER_PC_SAMPLING_RECORD_LAST, - false); + false, + not_operation); // kfd/kfd_id.h ROCPROFILER_ENUM_INFO(rocprofiler_kfd_event_page_migrate_operation_t, ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_NONE, ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_LAST, - false); + false, + is_operation); ROCPROFILER_ENUM_INFO(rocprofiler_kfd_event_page_fault_operation_t, ROCPROFILER_KFD_EVENT_PAGE_FAULT_NONE, ROCPROFILER_KFD_EVENT_PAGE_FAULT_LAST, - false); + false, + is_operation); ROCPROFILER_ENUM_INFO(rocprofiler_kfd_event_queue_operation_t, ROCPROFILER_KFD_EVENT_QUEUE_NONE, ROCPROFILER_KFD_EVENT_QUEUE_LAST, - false); + false, + is_operation); ROCPROFILER_ENUM_INFO(rocprofiler_kfd_event_unmap_from_gpu_operation_t, ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_NONE, ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_LAST, - false); + false, + is_operation); ROCPROFILER_ENUM_INFO(rocprofiler_kfd_event_dropped_events_operation_t, ROCPROFILER_KFD_EVENT_DROPPED_EVENTS_NONE, ROCPROFILER_KFD_EVENT_DROPPED_EVENTS_LAST, - false); + false, + is_operation); ROCPROFILER_ENUM_INFO(rocprofiler_kfd_page_migrate_operation_t, ROCPROFILER_KFD_PAGE_MIGRATE_NONE, ROCPROFILER_KFD_PAGE_MIGRATE_LAST, - false); + false, + is_operation); ROCPROFILER_ENUM_INFO(rocprofiler_kfd_page_fault_operation_t, ROCPROFILER_KFD_PAGE_FAULT_NONE, ROCPROFILER_KFD_PAGE_FAULT_LAST, - false); + false, + is_operation); ROCPROFILER_ENUM_INFO(rocprofiler_kfd_queue_operation_t, ROCPROFILER_KFD_QUEUE_NONE, ROCPROFILER_KFD_QUEUE_LAST, - false); + false, + is_operation); ROCPROFILER_ENUM_INFO(rocprofiler_external_correlation_id_request_kind_t, ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_NONE, ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST, - false); + false, + is_operation); ROCPROFILER_ENUM_INFO(rocprofiler_thread_trace_parameter_type_t, ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU, ROCPROFILER_THREAD_TRACE_PARAMETER_LAST, - false); + false, + not_operation); ROCPROFILER_ENUM_INFO(rocprofiler_agent_version_t, ROCPROFILER_AGENT_INFO_VERSION_NONE, ROCPROFILER_AGENT_INFO_VERSION_LAST, - false); + false, + not_operation); // begin fwd.h // rocprofiler_hsa_table_id_t diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/domain.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/domain.hpp index c100a9dc37..f62921fb4f 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/domain.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/domain.hpp @@ -25,6 +25,7 @@ #include "lib/common/mpl.hpp" #include +#include #include #include @@ -35,7 +36,10 @@ namespace rocprofiler namespace context { // number of bits to reserve all op codes -constexpr size_t domain_ops_padding = 512; +// NOTE: HIP runtime API has 515+ operations as of ROCm 7.x, must be larger than max op count. +// NOTE: This is set in so we can apply static_asserts to +// ensure that any operation enumeration does not exceed this value. +constexpr auto domain_ops_padding = ::rocprofiler::sdk::details::context_domain_ops_padding; template struct domain_info; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.cpp index b0b4f027c7..d8653403e1 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/hip.cpp @@ -27,6 +27,7 @@ #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/context/domain.hpp" #include "lib/rocprofiler-sdk/hip/utils.hpp" #include "lib/rocprofiler-sdk/registration.hpp" #include "lib/rocprofiler-sdk/tracing/fwd.hpp" @@ -532,6 +533,9 @@ update_table(Tp* _orig, std::integral_constant) { using table_type = typename hip_table_lookup::type; + static_assert(OpIdx < context::domain_ops_padding, + "operation index exceeds context domain ops padding"); + if constexpr(std::is_same::value) { auto _info = hip_api_info{}; @@ -553,7 +557,7 @@ update_table(Tp* _orig, std::integral_constant) // 3. update function pointer with wrapper auto& _table = _info.get_table(_orig); auto& _func = _info.get_table_func(_table); - _func = _info.get_functor(_func); + if(_func) _func = _info.get_functor(_func); } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp index 05bc94b46f..14dd9dafa6 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp @@ -26,6 +26,7 @@ #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/context/domain.hpp" #include "lib/rocprofiler-sdk/hsa/details/ostream.hpp" #include "lib/rocprofiler-sdk/hsa/pc_sampling.hpp" #include "lib/rocprofiler-sdk/hsa/scratch_memory.hpp" @@ -632,6 +633,9 @@ update_table(const context::context_array_t& _contexts, { using table_type = typename hsa_table_lookup::type; + static_assert(OpIdx < context::domain_ops_padding, + "operation index exceeds context domain ops padding"); + if constexpr(std::is_same::value) { auto _info = hsa_api_info{}; @@ -654,7 +658,7 @@ update_table(const context::context_array_t& _contexts, // 3. update function pointer with wrapper auto& _table = _info.get_table(_orig); auto& _func = _info.get_table_func(_table); - _func = _info.get_functor(_func); + if(_func) _func = _info.get_functor(_func); } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/marker/marker.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/marker/marker.cpp index 8be6dcb5cf..5c83d40f82 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/marker/marker.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/marker/marker.cpp @@ -27,6 +27,7 @@ #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" #include "lib/rocprofiler-sdk/context/correlation_id.hpp" +#include "lib/rocprofiler-sdk/context/domain.hpp" #include "lib/rocprofiler-sdk/marker/utils.hpp" #include "lib/rocprofiler-sdk/registration.hpp" #include "lib/rocprofiler-sdk/tracing/tracing.hpp" @@ -411,6 +412,9 @@ update_table(Tp* _orig, std::integral_constant) { using table_type = typename roctx_table_lookup::type; + static_assert(OpIdx < context::domain_ops_padding, + "operation index exceeds context domain ops padding"); + if constexpr(std::is_same::value) { auto _info = roctx_api_info{}; @@ -431,7 +435,7 @@ update_table(Tp* _orig, std::integral_constant) // 3. update function pointer with wrapper auto& _table = _info.get_table(_orig); auto& _func = _info.get_table_func(_table); - _func = _info.get_functor(_func); + if(_func) _func = _info.get_functor(_func); } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/marker/range_marker.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/marker/range_marker.cpp index 9c179ad3c9..8ed48dcf3f 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/marker/range_marker.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/marker/range_marker.cpp @@ -26,6 +26,7 @@ #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" #include "lib/rocprofiler-sdk/context/correlation_id.hpp" +#include "lib/rocprofiler-sdk/context/domain.hpp" #include "lib/rocprofiler-sdk/marker/marker.hpp" #include "lib/rocprofiler-sdk/marker/utils.hpp" #include "lib/rocprofiler-sdk/registration.hpp" @@ -732,6 +733,9 @@ update_table(Tp* _orig, std::integral_constant) { using table_type = typename roctx_table_lookup::type; + static_assert(OpIdx < context::domain_ops_padding, + "operation index exceeds context domain ops padding"); + if constexpr(std::is_same::value) { auto _info = roctx_api_info{}; @@ -779,7 +783,7 @@ update_table(Tp* _orig, std::integral_constant) // 3. update function pointer with wrapper auto& _table = _info.get_table(_orig); auto& _func = _info.get_table_func(_table); - _func = _info.get_functor(_func); + if(_func) _func = _info.get_functor(_func); } } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp index 322ccfe63e..f5329ad532 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rccl/rccl.cpp @@ -26,6 +26,7 @@ #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/context/domain.hpp" #include "lib/rocprofiler-sdk/hip/hip.hpp" #include "lib/rocprofiler-sdk/hip/utils.hpp" #include "lib/rocprofiler-sdk/registration.hpp" @@ -429,6 +430,9 @@ update_table(Tp* _orig, std::integral_constant) { using table_type = typename rccl_table_lookup::type; + static_assert(OpIdx < context::domain_ops_padding, + "operation index exceeds context domain ops padding"); + if constexpr(std::is_same::value) { auto _info = rccl_api_info{}; @@ -448,7 +452,7 @@ update_table(Tp* _orig, std::integral_constant) // 3. update function pointer with wrapper auto& _table = _info.get_table(_orig); auto& _func = _info.get_table_func(_table); - _func = _info.get_functor(_func); + if(_func) _func = _info.get_functor(_func); } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.cpp index 10fd9291b3..25282cd31d 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocdecode/rocdecode.cpp @@ -27,6 +27,7 @@ #include "lib/common/string_entry.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/context/domain.hpp" #include "lib/rocprofiler-sdk/registration.hpp" #include "lib/rocprofiler-sdk/tracing/tracing.hpp" @@ -504,6 +505,9 @@ update_table(Tp* _orig, std::integral_constant) { using table_type = typename rocdecode_table_lookup::type; + static_assert(OpIdx < context::domain_ops_padding, + "operation index exceeds context domain ops padding"); + if constexpr(std::is_same::value) { auto _info = rocdecode_api_info{}; @@ -525,7 +529,7 @@ update_table(Tp* _orig, std::integral_constant) // 3. update function pointer with wrapper auto& _table = _info.get_table(_orig); auto& _func = _info.get_table_func(_table); - _func = _info.get_functor(_func); + if(_func) _func = _info.get_functor(_func); } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocjpeg/rocjpeg.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocjpeg/rocjpeg.cpp index d21aec4976..09b305469e 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocjpeg/rocjpeg.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/rocjpeg/rocjpeg.cpp @@ -26,6 +26,7 @@ #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/context/domain.hpp" #include "lib/rocprofiler-sdk/hip/utils.hpp" #include "lib/rocprofiler-sdk/registration.hpp" #include "lib/rocprofiler-sdk/tracing/tracing.hpp" @@ -428,6 +429,9 @@ update_table(Tp* _orig, std::integral_constant) { using table_type = typename rocjpeg_table_lookup::type; + static_assert(OpIdx < context::domain_ops_padding, + "operation index exceeds context domain ops padding"); + if constexpr(std::is_same::value) { auto _info = rocjpeg_api_info{}; @@ -448,7 +452,7 @@ update_table(Tp* _orig, std::integral_constant) // 3. update function pointer with wrapper auto& _table = _info.get_table(_orig); auto& _func = _info.get_table_func(_table); - _func = _info.get_functor(_func); + if(_func) _func = _info.get_functor(_func); } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/enum_string.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/enum_string.cpp index 59b361c5d5..5f8396d4f8 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/enum_string.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/enum_string.cpp @@ -368,7 +368,11 @@ namespace sdk namespace details { using namespace enum_string_test; -ROCPROFILER_ENUM_INFO(test_unsupported_enum_val, TEST_ENUM_VALUE_NONE, TEST_ENUM_VALUE_LAST, false); +ROCPROFILER_ENUM_INFO(test_unsupported_enum_val, + TEST_ENUM_VALUE_NONE, + TEST_ENUM_VALUE_LAST, + false, + not_operation); ROCPROFILER_ENUM_LABEL(TEST_ENUM_VALUE_V1); ROCPROFILER_ENUM_LABEL(TEST_ENUM_VALUE_V3); } // namespace details