Revert "Use COMgr to read Kernel Args Metadata (#1006)"
This reverts commit882006555b. [ROCm/hip commit:a3d118eaa8]
This commit is contained in:
committad av
Siu Chi Chan
förälder
c797e6e27b
incheckning
62e96cb4cf
@@ -193,18 +193,6 @@ if(USE_PROF_API EQUAL 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
################
|
||||
# Detect Code Object Manager
|
||||
################
|
||||
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
find_package(amd_comgr REQUIRED CONFIG
|
||||
PATHS
|
||||
/opt/rocm/lib/cmake/amd_comgr
|
||||
)
|
||||
MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.")
|
||||
endif()
|
||||
|
||||
#############################
|
||||
# Build steps
|
||||
#############################
|
||||
@@ -276,9 +264,6 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
target_link_libraries(hip_hcc_static PRIVATE hc_am)
|
||||
endif()
|
||||
|
||||
target_link_libraries(hip_hcc PRIVATE amd_comgr)
|
||||
target_link_libraries(hip_hcc_static PRIVATE amd_comgr)
|
||||
|
||||
string(REPLACE " " ";" HCC_CXX_FLAGS_LIST ${HCC_CXX_FLAGS})
|
||||
foreach(TARGET hip_hcc hip_hcc_static)
|
||||
target_include_directories(${TARGET} SYSTEM INTERFACE $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>;${HSA_PATH}/include)
|
||||
|
||||
@@ -265,11 +265,9 @@ if ($HIP_PLATFORM eq "clang") {
|
||||
|
||||
$HIPCXXFLAGS .= " -isystem $HIP_PATH/include/hip/hcc_detail/cuda";
|
||||
$HIPCXXFLAGS .= " -isystem $HSA_PATH/include";
|
||||
$HIPCXXFLAGS .= " -isystem /opt/rocm/include";
|
||||
$HIPCXXFLAGS .= " -Wno-deprecated-register";
|
||||
|
||||
$HIPLDFLAGS .= " -L$HSA_PATH/lib -L$ROCM_PATH/lib -lhsa-runtime64 -lhc_am ";
|
||||
$HIPLDFLAGS .= " -L/opt/rocm/lib -Wl,--rpath=/opt/rocm/lib -lamd_comgr ";
|
||||
# $HIPLDFLAGS .= " -L$HCC_HOME/compiler/lib -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMMC -lLLVMCore -lLLVMSupport ";
|
||||
|
||||
# Add trace marker library:
|
||||
|
||||
@@ -39,7 +39,6 @@ THE SOFTWARE.
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
#include <amd_comgr.h>
|
||||
|
||||
#include <link.h>
|
||||
|
||||
@@ -56,8 +55,6 @@ THE SOFTWARE.
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
struct ihipModuleSymbol_t;
|
||||
using hipFunction_t = ihipModuleSymbol_t*;
|
||||
@@ -561,185 +558,92 @@ const std::unordered_map<
|
||||
}
|
||||
|
||||
inline
|
||||
void checkError(
|
||||
amd_comgr_status_t status,
|
||||
char const *str) {
|
||||
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
const char *status_str;
|
||||
status = amd_comgr_status_string(status, &status_str);
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS)
|
||||
std::cerr << "FAILED: " << str << "\n REASON: " << status_str << std::endl;
|
||||
hip_throw(std::runtime_error{"Metadata parsing failed."});
|
||||
}
|
||||
std::size_t parse_args(
|
||||
const std::string& metadata,
|
||||
std::size_t f,
|
||||
std::size_t l,
|
||||
std::vector<std::pair<std::size_t, std::size_t>>& size_align) {
|
||||
if (f == l) return f;
|
||||
if (!size_align.empty()) return l;
|
||||
|
||||
do {
|
||||
static constexpr size_t size_sz{5};
|
||||
f = metadata.find("Size:", f) + size_sz;
|
||||
|
||||
if (l <= f) return f;
|
||||
|
||||
auto size = std::strtoul(&metadata[f], nullptr, 10);
|
||||
|
||||
static constexpr size_t align_sz{6};
|
||||
f = metadata.find("Align:", f) + align_sz;
|
||||
|
||||
char* l{};
|
||||
auto align = std::strtoul(&metadata[f], &l, 10);
|
||||
|
||||
f += (l - &metadata[f]) + 1;
|
||||
|
||||
size_align.emplace_back(size, align);
|
||||
} while (true);
|
||||
}
|
||||
|
||||
class comgr_metadata_node {
|
||||
public:
|
||||
amd_comgr_metadata_node_t node;
|
||||
bool active;
|
||||
comgr_metadata_node() : active(false) {}
|
||||
~comgr_metadata_node() {
|
||||
if(active)
|
||||
checkError(amd_comgr_destroy_metadata(node), "amd_comgr_destroy_metadata");
|
||||
}
|
||||
bool is_active() { return active; }
|
||||
void set_active(bool value) { active = value; }
|
||||
comgr_metadata_node(const comgr_metadata_node&) = delete;
|
||||
comgr_metadata_node(comgr_metadata_node&&) = delete;
|
||||
comgr_metadata_node& operator=(const comgr_metadata_node&) = delete;
|
||||
comgr_metadata_node& operator=(comgr_metadata_node&&) = delete;
|
||||
|
||||
};
|
||||
|
||||
class comgr_data {
|
||||
public:
|
||||
amd_comgr_data_t data;
|
||||
bool active;
|
||||
comgr_data() : active(false) {}
|
||||
~comgr_data() {
|
||||
if(active)
|
||||
checkError(amd_comgr_release_data(data), "amd_comgr_release_data");
|
||||
}
|
||||
bool is_active() { return active; }
|
||||
void set_active(bool value) { active = value; }
|
||||
comgr_data(const comgr_data&) = delete;
|
||||
comgr_data(comgr_data&&) = delete;
|
||||
comgr_data& operator=(const comgr_data&) = delete;
|
||||
comgr_data& operator=(comgr_data&&) = delete;
|
||||
|
||||
};
|
||||
|
||||
inline
|
||||
std::string lookup_keyword_value(
|
||||
amd_comgr_metadata_node_t& in_node,
|
||||
std::string keyword) {
|
||||
amd_comgr_status_t status;
|
||||
size_t value_size;
|
||||
comgr_metadata_node value_meta;
|
||||
|
||||
status = amd_comgr_metadata_lookup(in_node, keyword.c_str(), &value_meta.node);
|
||||
checkError(status, "amd_comgr_metadata_lookup");
|
||||
value_meta.set_active(true);
|
||||
status = amd_comgr_get_metadata_string(value_meta.node, &value_size, NULL);
|
||||
checkError(status, "amd_comgr_get_metadata_string");
|
||||
// Since value_size returns size with null terminator, we don't include for C++ string size
|
||||
value_size--;
|
||||
std::string value(value_size, '\0');
|
||||
status = amd_comgr_get_metadata_string(value_meta.node, &value_size, &value[0]);
|
||||
checkError(status, "amd_comgr_get_metadata_string");
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
inline
|
||||
void process_kernarg_metadata(
|
||||
amd_comgr_metadata_node_t blob_meta,
|
||||
std::unordered_map<
|
||||
std::string,
|
||||
std::vector<std::pair<std::size_t, std::size_t>>>& kernargs) {
|
||||
amd_comgr_status_t status;
|
||||
amd_comgr_metadata_kind_t mkindLookup;
|
||||
comgr_metadata_node kernelList;
|
||||
std::string kernel_name;
|
||||
size_t num_kernels = 0;
|
||||
size_t num_kern_args = 0;
|
||||
|
||||
// Kernels is a list of MAPS!!
|
||||
status = amd_comgr_metadata_lookup(blob_meta, "Kernels", &kernelList.node);
|
||||
// FIXME - few hip memset kernels are missing Kernels node
|
||||
if(status != AMD_COMGR_STATUS_SUCCESS)
|
||||
return;
|
||||
kernelList.set_active(true);
|
||||
|
||||
status = amd_comgr_get_metadata_kind(kernelList.node, &mkindLookup);
|
||||
if (mkindLookup != AMD_COMGR_METADATA_KIND_LIST) {
|
||||
hip_throw(std::runtime_error{"Lookup of Kernels didn't return a list\n"});
|
||||
}
|
||||
|
||||
status = amd_comgr_get_metadata_list_size(kernelList.node, &num_kernels);
|
||||
checkError(status, "amd_comgr_get_metadata_list_size");
|
||||
for (int i = 0; i < num_kernels; i++) {
|
||||
comgr_metadata_node kernelMap;
|
||||
status = amd_comgr_index_list_metadata(kernelList.node, i, &kernelMap.node);
|
||||
checkError(status, "amd_comgr_index_list_metadata");
|
||||
kernelMap.set_active(true);
|
||||
|
||||
kernel_name = std::move(lookup_keyword_value(kernelMap.node, "Name"));
|
||||
|
||||
// Check if this kernel was already processed
|
||||
if(!kernargs[kernel_name].empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
comgr_metadata_node kernArgList;
|
||||
status = amd_comgr_metadata_lookup(kernelMap.node, "Args", &kernArgList.node);
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS ) {
|
||||
kernArgList.set_active(true);
|
||||
status = amd_comgr_get_metadata_kind(kernArgList.node, &mkindLookup);
|
||||
if (mkindLookup != AMD_COMGR_METADATA_KIND_LIST) {
|
||||
hip_throw(std::runtime_error{"Lookup of Args didn't return a list\n"});
|
||||
}
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS ) {
|
||||
status = amd_comgr_get_metadata_list_size(kernArgList.node, &num_kern_args);
|
||||
checkError(status, "amd_comgr_get_metadata_list_size");
|
||||
for (int k_ar = 0; k_ar < num_kern_args; k_ar++) {
|
||||
comgr_metadata_node kernArgMap;
|
||||
status = amd_comgr_index_list_metadata(kernArgList.node, k_ar, &kernArgMap.node);
|
||||
checkError(status, "amd_comgr_index_list_metadata");
|
||||
kernArgMap.set_active(true);
|
||||
size_t k_arg_size, k_arg_align;
|
||||
|
||||
k_arg_size = std::stoul(lookup_keyword_value(kernArgMap.node, "Size"));
|
||||
k_arg_align = std::stoul(lookup_keyword_value(kernArgMap.node, "Align"));
|
||||
|
||||
// Save it into our kernargs
|
||||
kernargs[kernel_name].emplace_back(k_arg_size, k_arg_align);
|
||||
|
||||
} // end kernArgMap
|
||||
}
|
||||
} // end kernArgList
|
||||
} // end kernelMap
|
||||
} // end kernelList
|
||||
|
||||
inline
|
||||
void read_kernarg_metadata(
|
||||
std::string blob,
|
||||
ELFIO::elfio& reader,
|
||||
std::unordered_map<
|
||||
std::string,
|
||||
std::vector<std::pair<std::size_t, std::size_t>>>& kernargs) {
|
||||
// TODO: this is inefficient.
|
||||
auto it = find_section_if(reader, [](const ELFIO::section* x) {
|
||||
return x->get_type() == SHT_NOTE;
|
||||
});
|
||||
|
||||
const char *blob_buf = blob.data();
|
||||
long blob_size = blob.size();
|
||||
if (!it) return;
|
||||
|
||||
amd_comgr_status_t status;
|
||||
comgr_data blob_data;
|
||||
status = amd_comgr_create_data(AMD_COMGR_DATA_KIND_RELOCATABLE, &blob_data.data);
|
||||
checkError(status, "amd_comgr_create_data");
|
||||
blob_data.set_active(true);
|
||||
const ELFIO::note_section_accessor acc{reader, it};
|
||||
for (decltype(acc.get_notes_num()) i = 0; i != acc.get_notes_num(); ++i) {
|
||||
ELFIO::Elf_Word type{};
|
||||
std::string name{};
|
||||
void* desc{};
|
||||
ELFIO::Elf_Word desc_size{};
|
||||
|
||||
status = amd_comgr_set_data(blob_data.data, blob_size, blob_buf);
|
||||
if(status != AMD_COMGR_STATUS_SUCCESS)
|
||||
return;
|
||||
acc.get_note(i, type, name, desc, desc_size);
|
||||
|
||||
// We have a valid code object now
|
||||
status = amd_comgr_set_data_name(blob_data.data, "HIP Code Object");
|
||||
checkError(status, "amd_comgr_set_data_name");
|
||||
if (name != "AMD") continue; // TODO: switch to using NT_AMD_AMDGPU_HSA_METADATA.
|
||||
|
||||
comgr_metadata_node blob_meta;
|
||||
status = amd_comgr_get_data_metadata(blob_data.data, &blob_meta.node);
|
||||
checkError(status, "amd_comgr_get_data_metadata");
|
||||
blob_meta.set_active(true);
|
||||
std::string tmp{
|
||||
static_cast<char*>(desc), static_cast<char*>(desc) + desc_size};
|
||||
|
||||
// Root is a map
|
||||
amd_comgr_metadata_kind_t blob_mkind;
|
||||
status = amd_comgr_get_metadata_kind(blob_meta.node, &blob_mkind);
|
||||
checkError(status, "amd_comgr_get_metadata_kind");
|
||||
if (blob_mkind != AMD_COMGR_METADATA_KIND_MAP) {
|
||||
hip_throw(std::runtime_error{"Root is not map\n"});
|
||||
auto dx = tmp.find("Kernels:");
|
||||
|
||||
if (dx == std::string::npos) continue;
|
||||
|
||||
static constexpr decltype(tmp.size()) kernels_sz{8};
|
||||
dx += kernels_sz;
|
||||
|
||||
do {
|
||||
dx = tmp.find("Name:", dx);
|
||||
|
||||
if (dx == std::string::npos) break;
|
||||
|
||||
static constexpr decltype(tmp.size()) name_sz{5};
|
||||
dx = tmp.find_first_not_of(" '", dx + name_sz);
|
||||
|
||||
auto fn = tmp.substr(dx, tmp.find_first_of("'\n", dx) - dx);
|
||||
dx += fn.size();
|
||||
|
||||
auto dx1 = tmp.find("CodeProps", dx);
|
||||
dx = tmp.find("Args:", dx);
|
||||
|
||||
if (dx1 < dx) {
|
||||
dx = dx1;
|
||||
continue;
|
||||
}
|
||||
if (dx == std::string::npos) break;
|
||||
|
||||
static constexpr decltype(tmp.size()) args_sz{5};
|
||||
dx = parse_args(tmp, dx + args_sz, dx1, kernargs[fn]);
|
||||
} while (true);
|
||||
}
|
||||
|
||||
process_kernarg_metadata(blob_meta.node, kernargs);
|
||||
}
|
||||
|
||||
inline
|
||||
@@ -753,7 +657,13 @@ const std::unordered_map<
|
||||
std::call_once(f, []() {
|
||||
for (auto&& isa_blobs : code_object_blobs()) {
|
||||
for (auto&& blob : isa_blobs.second) {
|
||||
read_kernarg_metadata(std::string{blob.cbegin(), blob.cend()}, r);
|
||||
std::stringstream tmp{std::string{blob.cbegin(), blob.cend()}};
|
||||
|
||||
ELFIO::elfio reader;
|
||||
|
||||
if (!reader.load(tmp)) continue;
|
||||
|
||||
read_kernarg_metadata(reader, r);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -30,9 +30,9 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM")
|
||||
set(CPACK_BINARY_DEB "ON")
|
||||
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm")
|
||||
if(@COMPILE_HIP_ATP_MARKER@)
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), ${HCC_PACKAGE_NAME} (= @HCC_PACKAGE_VERSION@), rocm-profiler, comgr (>= 1.1)")
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), ${HCC_PACKAGE_NAME} (= @HCC_PACKAGE_VERSION@), rocm-profiler")
|
||||
else()
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), ${HCC_PACKAGE_NAME} (= @HCC_PACKAGE_VERSION@), comgr (>= 1.1)")
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), ${HCC_PACKAGE_NAME} (= @HCC_PACKAGE_VERSION@)")
|
||||
endif()
|
||||
set(CPACK_BINARY_RPM "ON")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
|
||||
@@ -40,9 +40,9 @@ set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst")
|
||||
set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm")
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQPROV " no")
|
||||
if(@COMPILE_HIP_ATP_MARKER@)
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, rocm-profiler, comgr")
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, rocm-profiler")
|
||||
else()
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, comgr")
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@")
|
||||
endif()
|
||||
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt")
|
||||
set(CPACK_SOURCE_GENERATOR "TGZ")
|
||||
|
||||
Referens i nytt ärende
Block a user