diff --git a/projects/clr/hipamd/src/hip_code_object.cpp b/projects/clr/hipamd/src/hip_code_object.cpp index 1a3963af84..a5a3fb5861 100644 --- a/projects/clr/hipamd/src/hip_code_object.cpp +++ b/projects/clr/hipamd/src/hip_code_object.cpp @@ -30,21 +30,32 @@ THE SOFTWARE. #include "hip_internal.hpp" #include "platform/program.hpp" #include +#include "comgrctx.hpp" namespace hip { hipError_t ihipFree(void* ptr); // forward declaration of methods required for managed variables hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0); namespace { -constexpr char kOffloadBundleMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__"; +// In uncompressed mode +constexpr char kOffloadBundleUncompressedMagicStr[] = "__CLANG_OFFLOAD_BUNDLE__"; +static constexpr size_t kOffloadBundleUncompressedMagicStrSize = + sizeof(kOffloadBundleUncompressedMagicStr); + +// In compressed mode +constexpr char kOffloadBundleCompressedMagicStr[] = "CCOB"; +static constexpr size_t kOffloadBundleCompressedMagicStrSize = + sizeof(kOffloadBundleCompressedMagicStr); + constexpr char kOffloadKindHip[] = "hip"; constexpr char kOffloadKindHipv4[] = "hipv4"; constexpr char kOffloadKindHcc[] = "hcc"; constexpr char kAmdgcnTargetTriple[] = "amdgcn-amd-amdhsa-"; +constexpr char kHipFatBinName[] = "hipfatbin"; +constexpr char kHipFatBinName_[] = "hipfatbin-"; +constexpr char kOffloadKindHipv4_[] = "hipv4-"; // bundled code objects need the prefix +constexpr char kOffloadHipV4FatBinName_[] = "hipfatbin-hipv4-"; -// ClangOFFLOADBundle info. -static constexpr size_t kOffloadBundleMagicStrSize = sizeof(kOffloadBundleMagicStr); - -// Clang Offload bundler description & Header. +// Clang Offload bundler description & Header in uncompressed mode. struct __ClangOffloadBundleInfo { uint64_t offset; uint64_t size; @@ -52,16 +63,38 @@ struct __ClangOffloadBundleInfo { const char bundleEntryId[1]; }; -struct __ClangOffloadBundleHeader { - const char magic[kOffloadBundleMagicStrSize - 1]; +struct __ClangOffloadBundleUncompressedHeader { + const char magic[kOffloadBundleUncompressedMagicStrSize - 1]; uint64_t numOfCodeObjects; __ClangOffloadBundleInfo desc[1]; }; + +// Clang Offload bundler description & Header in compressed mode. +struct __ClangOffloadBundleCompressedHeader { + const char magic[kOffloadBundleCompressedMagicStrSize - 1]; + uint16_t versionNumber; + uint16_t compressionMethod; + uint32_t totalSize; + uint32_t uncompressedBinarySize; + uint64_t Hash; + const char compressedBinarydesc[1]; +}; } // namespace -bool CodeObject::IsClangOffloadMagicBundle(const void* data) { - std::string magic(reinterpret_cast(data), kOffloadBundleMagicStrSize - 1); - return magic.compare(kOffloadBundleMagicStr) ? false : true; +bool CodeObject::IsClangOffloadMagicBundle(const void* data, bool& isCompressed) { + std::string magic(reinterpret_cast(data), + kOffloadBundleUncompressedMagicStrSize - 1); + if (!magic.compare(kOffloadBundleUncompressedMagicStr)) { + isCompressed = false; + return true; + } + std::string magic1(reinterpret_cast(data), + kOffloadBundleCompressedMagicStrSize - 1); + if (!magic1.compare(kOffloadBundleCompressedMagicStr)) { + isCompressed = true; + return true; + } + return false; } uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); } @@ -356,7 +389,7 @@ static bool consume(std::string& input, std::string consume_) { // Trim String till character, will be used to get gpuname // example: input is gfx908:sram-ecc+ and trim char is : -// input will become sram-ecc+. +// input will become :sram-ecc+. static std::string trimName(std::string& input, char trim) { auto pos_ = input.find(trim); auto res = input; @@ -369,6 +402,18 @@ static std::string trimName(std::string& input, char trim) { return res; } +// Trim String till character, will be used to get bundle entry ID. +// example: input is amdgcn-amd-amdhsa--gfx1035.bc and trim char is . +// input will become amdgcn-amd-amdhsa--gfx1035 +static bool trimNameTail(std::string& input, char trim) { + auto pos_ = input.rfind(trim); + if (pos_ == std::string::npos) { + return false; + } + input = input.substr(0, pos_); + return true; +} + static char getFeatureValue(std::string& input, std::string feature) { char res = ' '; if (consume(input, std::move(feature))) { @@ -483,8 +528,8 @@ hipError_t CodeObject::ExtractCodeObjectFromMemory( hipError_t CodeObject::extractCodeObjectFromFatBinary( const void* data, const std::vector& agent_triple_target_ids, std::vector>& code_objs) { - std::string magic((const char*)data, kOffloadBundleMagicStrSize); - if (magic.compare(kOffloadBundleMagicStr)) { + std::string magic((const char*)data, kOffloadBundleUncompressedMagicStrSize); + if (magic.compare(kOffloadBundleUncompressedMagicStr)) { return hipErrorInvalidKernelFile; } @@ -494,7 +539,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary( code_objs.push_back(std::make_pair(nullptr, 0)); } - const auto obheader = reinterpret_cast(data); + const auto obheader = reinterpret_cast(data); const auto* desc = &obheader->desc[0]; size_t num_code_objs = code_objs.size(); for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i, @@ -529,7 +574,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary( LogPrintfError(" %s - [%s]", agent_triple_target_ids[i].c_str(), ((code_objs[i].first) ? "Found" : "Not Found")); } - const auto obheader = reinterpret_cast(data); + const auto obheader = reinterpret_cast(data); const auto* desc = &obheader->desc[0]; LogPrintfError("%s", " Bundled Code Objects:"); for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i, @@ -554,6 +599,343 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary( } } +// ================================================================================================ +size_t CodeObject::getFatbinSize(const void* data, const bool isCompressed) { + if (isCompressed) { + const auto obheader = reinterpret_cast(data); + return obheader->totalSize; + } else { + const auto obheader = reinterpret_cast(data); + const __ClangOffloadBundleInfo* desc = &obheader->desc[0]; + uint64_t i = 0; + while (++i < obheader->numOfCodeObjects) { + desc = reinterpret_cast( + reinterpret_cast(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize); + } + return desc->offset + desc->size; + } +} + +// ================================================================================================ +hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr( + const void* data, size_t size, const std::vector& agent_triple_target_ids, + std::vector>& code_objs) { + hipError_t hipStatus = hipSuccess; + amd_comgr_status_t comgrStatus = AMD_COMGR_STATUS_SUCCESS; + + const size_t num_devices = agent_triple_target_ids.size(); + size_t num_code_objs = num_devices; + bool isCompressed = false; + if (!IsClangOffloadMagicBundle(data, isCompressed)) { + LogPrintfInfo("IsClangOffloadMagicBundle(%p) return false", data); + // hipModuleLoadData() will possibly call here + return hipErrorInvalidKernelFile; + } + + if (size == 0) size = getFatbinSize(data, isCompressed); + + amd_comgr_data_t dataCodeObj{0}; + amd_comgr_data_set_t dataSetBundled{0}; + amd_comgr_data_set_t dataSetUnbundled{0}; + amd_comgr_action_info_t actionInfoUnbundle{0}; + amd_comgr_data_t item{0}; + + + std::set devicesSet{}; // To make sure device is unique + std::vector bundleEntryIDs{}; + static const std::string hipv4 = kOffloadKindHipv4_; // bundled code objects need the prefix + for (size_t i = 0; i < num_devices; i++) { + devicesSet.insert(hipv4 + agent_triple_target_ids[i]); + } + + for (auto& device : devicesSet) { + bundleEntryIDs.push_back(device.c_str()); + } + + do { + // Create Bundled dataset + comgrStatus = amd::Comgr::create_data_set(&dataSetBundled); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::create_data_set() failed with status 0x%xh", comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + + // CodeObject + comgrStatus = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE, &dataCodeObj); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError( + "amd::Comgr::create_data(AMD_COMGR_DATA_KIND_OBJ_BUNDLE) failed with status 0x%xh", + comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + + comgrStatus = amd::Comgr::set_data(dataCodeObj, size, static_cast(data)); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::set_data(size=%zu, data=%p) failed with status 0x%xh", size, data, + comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + + comgrStatus = amd::Comgr::set_data_name(dataCodeObj, kHipFatBinName); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError( + "amd::Comgr::set_data_name(" + ") failed with status 0x%xh", + comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + comgrStatus = amd::Comgr::data_set_add(dataSetBundled, dataCodeObj); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::data_set_add() failed with status 0x%xh", comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + // Set up ActionInfo + comgrStatus = amd::Comgr::create_action_info(&actionInfoUnbundle); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::create_action_info() failed with status 0x%xh", comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + + comgrStatus = amd::Comgr::action_info_set_language(actionInfoUnbundle, AMD_COMGR_LANGUAGE_HIP); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::action_info_set_language(HIP) failed with status 0x%xh", + comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + + comgrStatus = amd::Comgr::action_info_set_bundle_entry_ids( + actionInfoUnbundle, bundleEntryIDs.data(), bundleEntryIDs.size()); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError( + "amd::Comgr::action_info_set_bundle_entry_ids(%p, %zu) failed with status 0x%xh", + bundleEntryIDs.data(), bundleEntryIDs.size(), comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + + // Unbundle + comgrStatus = amd::Comgr::create_data_set(&dataSetUnbundled); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::create_data_set(&dataSetUnbundled) failed with status 0x%xh", + comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + comgrStatus = amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE, actionInfoUnbundle, + dataSetBundled, dataSetUnbundled); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) failed with status 0x%xh", + comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + + // Check CodeObject count + size_t count = 0; + comgrStatus = + amd::Comgr::action_data_count(dataSetUnbundled, AMD_COMGR_DATA_KIND_EXECUTABLE, &count); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::action_data_count() failed with status 0x%xh", comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + + // Initialize Code objects + code_objs.reserve(num_code_objs); + for (size_t i = 0; i < num_code_objs; i++) { + code_objs.push_back(std::make_pair(nullptr, 0)); + } + + for (size_t i = 0; i < count; i++) { + if (num_code_objs == 0) break; + + size_t itemSize = 0; + comgrStatus = amd::Comgr::action_data_get_data(dataSetUnbundled, + AMD_COMGR_DATA_KIND_EXECUTABLE, i, &item); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::action_data_get_data(%zu/%zu) failed with 0x%xh", i, count, + comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + + comgrStatus = amd::Comgr::get_data_name(item, &itemSize, nullptr); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::get_data_name(%zu/%zu) failed with 0x%xh", i, count, + comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + std::string bundleEntryId(itemSize, 0); + comgrStatus = amd::Comgr::get_data_name(item, &itemSize, bundleEntryId.data()); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::get_data_name(%zu/%zu, %d) failed with 0x%xh", i, count, + itemSize, comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + // Remove bundleEntryId_ + if (!consume(bundleEntryId, kOffloadHipV4FatBinName_)) { + // This is behavour in comgr unbundling which is subject to change. + // So just give info. + LogPrintfInfo("bundleEntryId=%s isn't prefixed with %s", bundleEntryId.c_str(), + kOffloadHipV4FatBinName_); + } + trimNameTail(bundleEntryId, '.'); // Remove .fileExtention + + char* itemData = nullptr; + for (size_t dev = 0; dev < num_devices; ++dev) { + if (code_objs[dev].first) continue; + // LogPrintfError("agent_triple_target_ids[%zu]=%s, bundleEntryId=%s", dev, + // agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str()); + + if (bundleEntryId == agent_triple_target_ids[dev]) { + if (itemData == nullptr) { + itemSize = 0; + comgrStatus = amd::Comgr::get_data(item, &itemSize, nullptr); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::get_data(%zu/%zu) failed with 0x%xh", i, count, + comgrStatus); + hipStatus = hipErrorInvalidValue; + break; + } + + if (itemSize == 0) { + // If there isn't a code object for this device, + // amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) still returns item with + // valid name but no data. We need continue searching for other devices + LogPrintfInfo( + "amd::Comgr::get_data() return 0 size for agent_triple_target_ids[%zu]=%s", dev, + agent_triple_target_ids[dev].c_str()); + continue; + } + + // itemData should be deleted in fatbin's destructor + itemData = new char[itemSize]; + if (itemData == nullptr) { + LogError("no enough memory"); + hipStatus = hipErrorOutOfMemory; + break; + } + comgrStatus = amd::Comgr::get_data(item, &itemSize, itemData); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::get_data(%zu/%zu, %d) failed with 0x%xh", i, count, + itemSize, comgrStatus); + hipStatus = hipErrorInvalidValue; + delete[] itemData; + itemData = nullptr; + break; + } + } + code_objs[dev] = std::make_pair(reinterpret_cast(itemData), itemSize); + --num_code_objs; + LogPrintfInfo( + "Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s), " + "Size=%zu, num_code_objs=%zu", + dev, agent_triple_target_ids[dev].c_str(), itemData, + isCompressed ? "compressed" : "uncompressed", itemSize, num_code_objs); + } + } + + comgrStatus = amd::Comgr::release_data(item); + item.handle = 0; + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus); + hipStatus = hipErrorInvalidValue; + } + if (hipStatus != hipSuccess) break; + } + } while (0); + + if (hipStatus == hipSuccess && num_code_objs != 0) { + hipStatus = hipErrorNoBinaryForGpu; + + // Leave it for debug purpose in uncompressed mode. + if (!isCompressed) { + LogPrintfError("%s", + "hipErrorNoBinaryForGpu: Unable to find code object for all current devices!"); + LogPrintfError("%s", " Devices:"); + for (size_t i = 0; i < agent_triple_target_ids.size(); i++) { + LogPrintfError(" %s - [%s]", agent_triple_target_ids[i].c_str(), + ((code_objs[i].first) ? "Found" : "Not Found")); + } + const auto obheader = reinterpret_cast(data); + const auto* desc = &obheader->desc[0]; + LogPrintfError("%s", " Bundled Code Objects:"); + for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->bundleEntryId[0]) + + desc->bundleEntryIdSize)) { + std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; + const void* image = + reinterpret_cast(reinterpret_cast(obheader) + desc->offset); + + std::string co_triple_target_id; + bool valid_co = getTripleTargetID(bundleEntryId, image, co_triple_target_id); + + if (valid_co) { + LogPrintfError(" %s - [Code object targetID is %s]", bundleEntryId.c_str(), + co_triple_target_id.c_str()); + } else { + LogPrintfError(" %s - [Unsupported]", bundleEntryId.c_str()); + } + } + } + } + + // Cleanup + if (actionInfoUnbundle.handle) { + comgrStatus = amd::Comgr::destroy_action_info(actionInfoUnbundle); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::destroy_action_info(actionInfoUnbundle) failed with status 0x%xh", + comgrStatus); + hipStatus = hipErrorInvalidValue; + } + } + if (dataSetBundled.handle) { + comgrStatus = amd::Comgr::destroy_data_set(dataSetBundled); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::destroy_data_set(dataSetBundled) failed with status 0x%xh", + comgrStatus); + hipStatus = hipErrorInvalidValue; + } + } + + if (dataSetUnbundled.handle) { + comgrStatus = amd::Comgr::destroy_data_set(dataSetUnbundled); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::destroy_data_set(dataSetUnbundled) failed with status 0x%xh", + comgrStatus); + hipStatus = hipErrorInvalidValue; + } + } + + if (dataCodeObj.handle) { + comgrStatus = amd::Comgr::release_data(dataCodeObj); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::release_data(dataCodeObj) failed with status 0x%xh", comgrStatus); + hipStatus = hipErrorInvalidValue; + } + } + + if (item.handle) { + comgrStatus = amd::Comgr::release_data(item); + if (comgrStatus != AMD_COMGR_STATUS_SUCCESS) { + LogPrintfError("amd::Comgr::release_data(item) failed with status 0x%xh", comgrStatus); + hipStatus = hipErrorInvalidValue; + } + } + + return hipStatus; +} + hipError_t DynCO::loadCodeObject(const char* fname, const void* image) { amd::ScopedLock lock(dclock_); diff --git a/projects/clr/hipamd/src/hip_code_object.hpp b/projects/clr/hipamd/src/hip_code_object.hpp index c83ec37b24..f0407f7bd4 100644 --- a/projects/clr/hipamd/src/hip_code_object.hpp +++ b/projects/clr/hipamd/src/hip_code_object.hpp @@ -63,15 +63,39 @@ class CodeObject { static uint64_t ElfSize(const void* emi); - static bool IsClangOffloadMagicBundle(const void* data); + static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed); -protected: + // Return size of fat bin + static size_t getFatbinSize(const void* data, const bool isCompressed = false); + + /** + * @brief Extract code object from fatbin using comgr unbundling action + * + * @param[in] data the bundle data(fatbin or loaded module data). It can be in uncompressed, + * compressed and even SPIR-V(to be supported later) mode. + * @param[in] size the size of the bundle data + * @param[in] agent_triple_target_ids isa names of concerned devices + * @param[out] code_objs the buffer address and size pairs of extracted code objects of + * concerned devices + * Returned error code + * + * @return #hipSuccess, #hipErrorInvalidKernelFile, #hipErrorInvalidValue, + * #hipErrorNoBinaryForGpu + * + * @see FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void* data, + * const std::vector& devices) + */ + static hipError_t extractCodeObjectFromFatBinaryUsingComgr( + const void* data, size_t size, const std::vector& devices, + std::vector>& code_objs); + + protected: //Given an ptr to image or file, extracts to code object //for corresponding devices static hipError_t extractCodeObjectFromFatBinary(const void*, const std::vector&, std::vector>&); - + CodeObject() {} private: friend const std::vector& modules(); diff --git a/projects/clr/hipamd/src/hip_fatbin.cpp b/projects/clr/hipamd/src/hip_fatbin.cpp index 562f193861..8b52e9f32f 100644 --- a/projects/clr/hipamd/src/hip_fatbin.cpp +++ b/projects/clr/hipamd/src/hip_fatbin.cpp @@ -25,6 +25,7 @@ THE SOFTWARE. #include #include "hip_code_object.hpp" #include "hip_platform.hpp" +#include "comgrctx.hpp" namespace hip { @@ -50,14 +51,21 @@ FatBinaryInfo::FatBinaryInfo(const char* fname, const void* image) : fdesc_(amd: } FatBinaryInfo::~FatBinaryInfo() { - + // Different devices in the same model have the same binary_image_ + std::set toDelete; // Release per device fat bin info. for (auto* fbd: fatbin_dev_info_) { if (fbd != nullptr) { + if (fbd->binary_image_ && fbd->binary_offset_ == 0 && fbd->binary_image_ != image_) { + toDelete.insert(fbd->binary_image_); + } delete fbd; } } - + for (auto itemData : toDelete) { + LogPrintfInfo("~FatBinaryInfo(%p) will delete binary_image_ %p", this, itemData); + delete[] reinterpret_cast(itemData); + } if (!HIP_USE_RUNTIME_UNBUNDLER) { // Using COMGR Unbundler if (ufd_ && amd::Os::isValidFileDesc(ufd_->fdesc_)) { @@ -163,9 +171,9 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vectordeviceId()] = new FatBinaryDeviceInfo(image_, CodeObject::ElfSize(image_), 0); @@ -178,7 +186,22 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector= 2 && minor >= 8) { + hip_status = ExtractFatBinaryUsingCOMGR(image_, devices); + break; + } else if (isCompressed) { + LogPrintfError( + "comgr %zu.%zu cannot support commpressed mode which need comgr 2.8+", major, minor); + hip_status = hipErrorNotSupported; + break; + } else if (HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION) { + HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION = false; + LogInfo("HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION = true only works on comgr 2.8+"); + } + } // Create a data object, if it fails return error if ((comgr_status = amd_comgr_create_data(AMD_COMGR_DATA_KIND_FATBIN, &data_object)) != AMD_COMGR_STATUS_SUCCESS) { @@ -442,4 +465,63 @@ hipError_t FatBinaryInfo::BuildProgram(const int device_id) { return hipSuccess; } +// ================================================================================================ +hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const void *data, + const std::vector& devices) { + hipError_t hip_status = hipSuccess; + // At this line, image should be a valid ptr. + guarantee(data != nullptr, "Image cannot be nullptr"); + + do { + std::vector> code_objs; + // Copy device names + std::vector device_names; + device_names.reserve(devices.size()); + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName()); + } + + hip_status = CodeObject::extractCodeObjectFromFatBinaryUsingComgr(data, 0, + device_names, code_objs); + if (hip_status == hipErrorNoBinaryForGpu || hip_status == hipSuccess) { + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + if (code_objs[dev_idx].first) { + fatbin_dev_info_[devices[dev_idx]->deviceId()] = + new FatBinaryDeviceInfo(code_objs[dev_idx].first, code_objs[dev_idx].second, 0); + + fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ = + new amd::Program(*devices[dev_idx]->asContext()); + if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == NULL) { + break; + } + } else { + // This is the case of hipErrorNoBinaryForGpu which will finally fail app on device + // without code object + LogPrintfError("Cannot find CO in the bundle %s for ISA: %s", fname_.c_str(), + device_names[dev_idx].c_str()); + } + } + } else if (hip_status == hipErrorInvalidKernelFile) { + hip_status = hipSuccess; + // If the image ptr is not clang offload bundle then just directly point the image. + for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { + fatbin_dev_info_[devices[dev_idx]->deviceId()] = + new FatBinaryDeviceInfo(data, CodeObject::ElfSize(data), 0); + fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ = + new amd::Program(*devices[dev_idx]->asContext()); + if (fatbin_dev_info_[devices[dev_idx]->deviceId()]->program_ == nullptr) { + hip_status = hipErrorOutOfMemory; + break; + } + } + } else { + LogPrintfError( + "CodeObject::extractCodeObjectFromFatBinaryUsingComgr failed with status %d\n", + hip_status); + } + } while (0); + + return hip_status; +} + } //namespace : hip diff --git a/projects/clr/hipamd/src/hip_fatbin.hpp b/projects/clr/hipamd/src/hip_fatbin.hpp index f9057a4b5a..5c4ea29761 100644 --- a/projects/clr/hipamd/src/hip_fatbin.hpp +++ b/projects/clr/hipamd/src/hip_fatbin.hpp @@ -65,6 +65,22 @@ public: // Loads Fat binary from file or image, unbundles COs for devices. hipError_t ExtractFatBinaryUsingCOMGR(const std::vector& devices); + + /** + * @brief Extract code object from fatbin using comgr unbundling action via calling + * CodeObject::extractCodeObjectFromFatBinaryUsingComgr + * + * @param[in] data the bundle data(fatbin or loaded module data). It can be in uncompressed, + * compressed and even SPIR-V(to be supported later) mode. + * @param[in] devices devices whose code objects will be extracted. + * Returned error code + * + * @return #hipSuccess, #hipErrorNoBinaryForGpu, #hipErrorInvalidValue + * + * @see CodeObject::extractCodeObjectFromFatBinaryUsingComgr() + */ + hipError_t ExtractFatBinaryUsingCOMGR(const void* data, + const std::vector& devices); hipError_t ExtractFatBinary(const std::vector& devices); hipError_t AddDevProgram(const int device_id); hipError_t BuildProgram(const int device_id); diff --git a/projects/clr/rocclr/device/comgrctx.cpp b/projects/clr/rocclr/device/comgrctx.cpp index 1a8afe60a9..6068ddde5d 100644 --- a/projects/clr/rocclr/device/comgrctx.cpp +++ b/projects/clr/rocclr/device/comgrctx.cpp @@ -114,12 +114,16 @@ bool Comgr::LoadLib(bool is_versioned) { GET_COMGR_SYMBOL(amd_comgr_iterate_symbols) GET_COMGR_SYMBOL(amd_comgr_symbol_lookup) GET_COMGR_SYMBOL(amd_comgr_symbol_get_info) - GET_COMGR_OPTIONAL_SYMBOL(amd_comgr_demangle_symbol_name) + GET_COMGR_SYMBOL(amd_comgr_demangle_symbol_name) GET_COMGR_SYMBOL(amd_comgr_populate_mangled_names) GET_COMGR_SYMBOL(amd_comgr_get_mangled_name) GET_COMGR_SYMBOL(amd_comgr_populate_name_expression_map) GET_COMGR_SYMBOL(amd_comgr_map_name_expression_to_symbol_name) + GET_COMGR_OPTIONAL_SYMBOL(amd_comgr_action_info_set_bundle_entry_ids) is_ready_ = true; + size_t major = 0, minor = 0; + get_version(&major, &minor); + ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Loaded COMGR library version %zu.%zu.", major, minor); return true; } diff --git a/projects/clr/rocclr/device/comgrctx.hpp b/projects/clr/rocclr/device/comgrctx.hpp index d8dd5e17f5..547e16c01d 100644 --- a/projects/clr/rocclr/device/comgrctx.hpp +++ b/projects/clr/rocclr/device/comgrctx.hpp @@ -76,6 +76,7 @@ typedef amd_comgr_status_t (*t_amd_comgr_populate_mangled_names)(amd_comgr_data_ typedef amd_comgr_status_t (*t_amd_comgr_get_mangled_name)(amd_comgr_data_t data, size_t index, size_t *size, char *mangled_name); typedef amd_comgr_status_t (*t_amd_comgr_populate_name_expression_map)(amd_comgr_data_t data, size_t *count); typedef amd_comgr_status_t (*t_amd_comgr_map_name_expression_to_symbol_name)(amd_comgr_data_t data, size_t *size, char *name_expression, char* symbol_name); +typedef amd_comgr_status_t (*t_amd_comgr_action_info_set_bundle_entry_ids)(amd_comgr_action_info_t action_info, const char* bundle_entry_ids[], size_t count); struct ComgrEntryPoints { void* handle; @@ -129,13 +130,16 @@ struct ComgrEntryPoints { t_amd_comgr_get_mangled_name amd_comgr_get_mangled_name; t_amd_comgr_populate_name_expression_map amd_comgr_populate_name_expression_map; t_amd_comgr_map_name_expression_to_symbol_name amd_comgr_map_name_expression_to_symbol_name; + t_amd_comgr_action_info_set_bundle_entry_ids amd_comgr_action_info_set_bundle_entry_ids; }; #ifdef COMGR_DYN_DLL #define COMGR_DYN(NAME) cep_.NAME #define GET_COMGR_SYMBOL(NAME) cep_.NAME = \ reinterpret_cast(Os::getSymbol(cep_.handle, #NAME)); \ - if (nullptr == cep_.NAME) { return false; } + if (nullptr == cep_.NAME) { \ + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "Failed to load COMGR function %s", #NAME); \ + return false; } #define GET_COMGR_OPTIONAL_SYMBOL(NAME) cep_.NAME = \ reinterpret_cast(Os::getSymbol(cep_.handle, #NAME)); #else @@ -289,13 +293,6 @@ public: } static amd_comgr_status_t demangle_symbol_name(amd_comgr_data_t MangledSymbolName, amd_comgr_data_t* DemangledSymbolName) { -#if defined(COMGR_DYN_DLL) - if (cep_.amd_comgr_demangle_symbol_name == nullptr) { - ClPrint(amd::LOG_ERROR, amd::LOG_CODE, - "Failed to load COMGR function amd_comgr_demangle_symbol_name"); - return AMD_COMGR_STATUS_ERROR; - } -#endif return COMGR_DYN(amd_comgr_demangle_symbol_name)(MangledSymbolName, DemangledSymbolName); } static amd_comgr_status_t populate_mangled_names(amd_comgr_data_t data, size_t *count) { @@ -310,8 +307,19 @@ public: static amd_comgr_status_t map_name_expression_to_symbol_name(amd_comgr_data_t data, size_t *size, char *name_expression, char* symbol_name) { return COMGR_DYN(amd_comgr_map_name_expression_to_symbol_name)(data, size, name_expression, symbol_name); } - - + static amd_comgr_status_t action_info_set_bundle_entry_ids(amd_comgr_action_info_t action_info, + const char* bundle_entry_ids[], size_t count) { +#if defined(COMGR_DYN_DLL) + if (cep_.amd_comgr_action_info_set_bundle_entry_ids == nullptr) { + // comgr version 2.7 or less is loaded + ClPrint(amd::LOG_ERROR, amd::LOG_CODE, + "Failed to load COMGR function amd_comgr_action_info_set_bundle_entry_ids"); + return AMD_COMGR_STATUS_ERROR; + } +#endif + return COMGR_DYN(amd_comgr_action_info_set_bundle_entry_ids)(action_info, bundle_entry_ids, + count); + } private: static ComgrEntryPoints cep_; static bool is_ready_; diff --git a/projects/clr/rocclr/utils/flags.hpp b/projects/clr/rocclr/utils/flags.hpp index 6eee4e8e48..362fcb298b 100644 --- a/projects/clr/rocclr/utils/flags.hpp +++ b/projects/clr/rocclr/utils/flags.hpp @@ -249,6 +249,8 @@ release(bool, HIP_VMEM_MANAGE_SUPPORT, true, \ "Virtual Memory Management Support") \ release(bool, DEBUG_HIP_GRAPH_DOT_PRINT, false, \ "Enable/Disable graph debug dot print dump") \ +release(bool, HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false, \ + "Force to always use new comgr unbundling action") \ namespace amd {