From 87fe3387c54fdc8f63e69160889c2fe07f61bb67 Mon Sep 17 00:00:00 2001 From: "Jakaraddi, Manjunath" Date: Tue, 4 Feb 2025 16:01:42 -0800 Subject: [PATCH] SWDEV-506317: Kernel trace failing due to Code object errors (#170) SWDEV-506317: Kernel trace failing [ROCm/rocprofiler-sdk commit: 9c89b475b0d69fb98a0ee14a74c7199bd87ed9f3] --- .../code_object/hip/code_object.cpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/hip/code_object.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/hip/code_object.cpp index fd4b33b486..b71accca1c 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/hip/code_object.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/code_object/hip/code_object.cpp @@ -66,21 +66,21 @@ constexpr auto kernel_symbol_metadata_lookup = ".symbol"; } \ } -#define CHECK_FATAL_COMGR(call) \ +#define CHECK_WARNING_COMGR(call) \ if(amd_comgr_status_s status = (call); status != AMD_COMGR_STATUS_SUCCESS) \ { \ const char* reason = ""; \ amd_comgr_status_string(status, &reason); \ - ROCP_FATAL << #call << " failed with error code " << status << " :: " << reason; \ + ROCP_WARNING << #call << " failed with error code " << status << " :: " << reason; \ } -#define CHECK_FATAL_COMGR_EXT(call, ...) \ +#define CHECK_WARNING_COMGR_EXT(call, ...) \ if(amd_comgr_status_s status = (call); status != AMD_COMGR_STATUS_SUCCESS) \ { \ const char* reason = ""; \ amd_comgr_status_string(status, &reason); \ - ROCP_FATAL << #call << " failed with error code " << status << " :: " << reason \ - << " :: " << __VA_ARGS__; \ + ROCP_WARNING << #call << " failed with error code " << status << " :: " << reason \ + << " :: " << __VA_ARGS__; \ } #define CHECK_RETURN_COMGR(call) \ @@ -147,12 +147,12 @@ get_isa_offsets(hsa_agent_t hsa_agent, const void* fat_bin) query_list.emplace_back(amd_comgr_code_object_info_t{isa->c_str(), 0, 0}); auto data_object = amd_comgr_data_t{0}; - CHECK_FATAL_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object)); - CHECK_FATAL_COMGR( + CHECK_WARNING_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object)); + CHECK_WARNING_COMGR( amd_comgr_set_data(data_object, 4096, reinterpret_cast(fat_bin))); - CHECK_FATAL_COMGR( + CHECK_WARNING_COMGR( amd_comgr_lookup_code_object(data_object, query_list.data(), query_list.size())); - CHECK_FATAL_COMGR(amd_comgr_release_data(data_object)); + CHECK_WARNING_COMGR(amd_comgr_release_data(data_object)); return query_list; } @@ -190,7 +190,7 @@ get_kernels_meta_node(const amd_comgr_code_object_info_t& isa_offset, amd_comgr_metadata_node_t* kernels_metadata) { auto binary_data = amd_comgr_data_t{0}; - CHECK_FATAL_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &binary_data)); + CHECK_WARNING_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &binary_data)); void* bin_offset = static_cast(const_cast(fat_bin)) + isa_offset.offset; CHECK_RETURN_COMGR_EXT( @@ -199,8 +199,8 @@ get_kernels_meta_node(const amd_comgr_code_object_info_t& isa_offset, << isa_offset.size << ", " << isa_offset.offset << "), fat_bin=" << fat_bin); auto binary_metadata = amd_comgr_metadata_node_t{}; - CHECK_FATAL_COMGR(amd_comgr_get_data_metadata(binary_data, &binary_metadata)); - CHECK_FATAL_COMGR( + CHECK_WARNING_COMGR(amd_comgr_get_data_metadata(binary_data, &binary_metadata)); + CHECK_WARNING_COMGR( amd_comgr_metadata_lookup(binary_metadata, kernels_metadata_lookup, kernels_metadata)); return AMD_COMGR_STATUS_SUCCESS; @@ -218,14 +218,14 @@ get_kernel_symbol_device_name_map(const amd_comgr_code_object_info_t& isa_offset AMD_COMGR_STATUS_SUCCESS) return kernel_sym_device_func_map; - CHECK_FATAL_COMGR(amd_comgr_get_metadata_list_size(kernels_metadata, &num_kernels)); + CHECK_WARNING_COMGR(amd_comgr_get_metadata_list_size(kernels_metadata, &num_kernels)); for(size_t i = 0; i < num_kernels; i++) { auto kernel_node = amd_comgr_metadata_node_t{}; auto kernel_name_meta = amd_comgr_metadata_node_t{}; - CHECK_FATAL_COMGR(amd_comgr_index_list_metadata(kernels_metadata, i, &kernel_node)); - CHECK_FATAL_COMGR( + CHECK_WARNING_COMGR(amd_comgr_index_list_metadata(kernels_metadata, i, &kernel_node)); + CHECK_WARNING_COMGR( amd_comgr_metadata_lookup(kernel_node, kernel_name_metadata_lookup, &kernel_name_meta)); auto kernel_meta_name = std::string{}; @@ -236,7 +236,7 @@ get_kernel_symbol_device_name_map(const amd_comgr_code_object_info_t& isa_offset ROCP_INFO << "found kernel meta name: " << kernel_meta_name; auto kernel_symbol = std::string{}; - CHECK_FATAL_COMGR(amd_comgr_iterate_map_metadata( + CHECK_WARNING_COMGR(amd_comgr_iterate_map_metadata( kernel_node, get_device_name_kernel_symbols_mapping, &kernel_symbol)); if(!kernel_symbol.empty()) {