SWDEV-506317: Kernel trace failing due to Code object errors (#170)

SWDEV-506317: Kernel trace failing
This commit is contained in:
Jakaraddi, Manjunath
2025-02-04 16:01:42 -08:00
committed by GitHub
szülő dd5c0ea257
commit 9c89b475b0
@@ -66,21 +66,21 @@ constexpr auto kernel_symbol_metadata_lookup = ".symbol";
} \
}
#define CHECK_FATAL_COMGR(call) \
#define CHECK_WARNING_COMGR(call) \
if(amd_comgr_status_s status = (call); status != AMD_COMGR_STATUS_SUCCESS) \
{ \
const char* reason = ""; \
amd_comgr_status_string(status, &reason); \
ROCP_FATAL << #call << " failed with error code " << status << " :: " << reason; \
ROCP_WARNING << #call << " failed with error code " << status << " :: " << reason; \
}
#define CHECK_FATAL_COMGR_EXT(call, ...) \
#define CHECK_WARNING_COMGR_EXT(call, ...) \
if(amd_comgr_status_s status = (call); status != AMD_COMGR_STATUS_SUCCESS) \
{ \
const char* reason = ""; \
amd_comgr_status_string(status, &reason); \
ROCP_FATAL << #call << " failed with error code " << status << " :: " << reason \
<< " :: " << __VA_ARGS__; \
ROCP_WARNING << #call << " failed with error code " << status << " :: " << reason \
<< " :: " << __VA_ARGS__; \
}
#define CHECK_RETURN_COMGR(call) \
@@ -147,12 +147,12 @@ get_isa_offsets(hsa_agent_t hsa_agent, const void* fat_bin)
query_list.emplace_back(amd_comgr_code_object_info_t{isa->c_str(), 0, 0});
auto data_object = amd_comgr_data_t{0};
CHECK_FATAL_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object));
CHECK_FATAL_COMGR(
CHECK_WARNING_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object));
CHECK_WARNING_COMGR(
amd_comgr_set_data(data_object, 4096, reinterpret_cast<const char*>(fat_bin)));
CHECK_FATAL_COMGR(
CHECK_WARNING_COMGR(
amd_comgr_lookup_code_object(data_object, query_list.data(), query_list.size()));
CHECK_FATAL_COMGR(amd_comgr_release_data(data_object));
CHECK_WARNING_COMGR(amd_comgr_release_data(data_object));
return query_list;
}
@@ -190,7 +190,7 @@ get_kernels_meta_node(const amd_comgr_code_object_info_t& isa_offset,
amd_comgr_metadata_node_t* kernels_metadata)
{
auto binary_data = amd_comgr_data_t{0};
CHECK_FATAL_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &binary_data));
CHECK_WARNING_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &binary_data));
void* bin_offset = static_cast<char*>(const_cast<void*>(fat_bin)) + isa_offset.offset;
CHECK_RETURN_COMGR_EXT(
@@ -199,8 +199,8 @@ get_kernels_meta_node(const amd_comgr_code_object_info_t& isa_offset,
<< isa_offset.size << ", " << isa_offset.offset << "), fat_bin=" << fat_bin);
auto binary_metadata = amd_comgr_metadata_node_t{};
CHECK_FATAL_COMGR(amd_comgr_get_data_metadata(binary_data, &binary_metadata));
CHECK_FATAL_COMGR(
CHECK_WARNING_COMGR(amd_comgr_get_data_metadata(binary_data, &binary_metadata));
CHECK_WARNING_COMGR(
amd_comgr_metadata_lookup(binary_metadata, kernels_metadata_lookup, kernels_metadata));
return AMD_COMGR_STATUS_SUCCESS;
@@ -218,14 +218,14 @@ get_kernel_symbol_device_name_map(const amd_comgr_code_object_info_t& isa_offset
AMD_COMGR_STATUS_SUCCESS)
return kernel_sym_device_func_map;
CHECK_FATAL_COMGR(amd_comgr_get_metadata_list_size(kernels_metadata, &num_kernels));
CHECK_WARNING_COMGR(amd_comgr_get_metadata_list_size(kernels_metadata, &num_kernels));
for(size_t i = 0; i < num_kernels; i++)
{
auto kernel_node = amd_comgr_metadata_node_t{};
auto kernel_name_meta = amd_comgr_metadata_node_t{};
CHECK_FATAL_COMGR(amd_comgr_index_list_metadata(kernels_metadata, i, &kernel_node));
CHECK_FATAL_COMGR(
CHECK_WARNING_COMGR(amd_comgr_index_list_metadata(kernels_metadata, i, &kernel_node));
CHECK_WARNING_COMGR(
amd_comgr_metadata_lookup(kernel_node, kernel_name_metadata_lookup, &kernel_name_meta));
auto kernel_meta_name = std::string{};
@@ -236,7 +236,7 @@ get_kernel_symbol_device_name_map(const amd_comgr_code_object_info_t& isa_offset
ROCP_INFO << "found kernel meta name: " << kernel_meta_name;
auto kernel_symbol = std::string{};
CHECK_FATAL_COMGR(amd_comgr_iterate_map_metadata(
CHECK_WARNING_COMGR(amd_comgr_iterate_map_metadata(
kernel_node, get_device_name_kernel_symbols_mapping, &kernel_symbol));
if(!kernel_symbol.empty())
{