From 8d90b44a1b73454f9ccda6790fb3153aeff52880 Mon Sep 17 00:00:00 2001 From: "Sang, Tao" Date: Thu, 27 Mar 2025 10:43:51 -0400 Subject: [PATCH] SWDEV-508863 - Support generic target in compressed fatbin (#44) --- hipamd/src/hip_code_object.cpp | 162 ++++++++++++++++++++------------ hipamd/src/hip_code_object.hpp | 4 + hipamd/src/hip_comgr_helper.cpp | 84 +++++++++-------- hipamd/src/hip_comgr_helper.hpp | 7 ++ 4 files changed, 158 insertions(+), 99 deletions(-) diff --git a/hipamd/src/hip_code_object.cpp b/hipamd/src/hip_code_object.cpp index bb18f45e16..c4d2cc6f24 100644 --- a/hipamd/src/hip_code_object.cpp +++ b/hipamd/src/hip_code_object.cpp @@ -31,6 +31,8 @@ THE SOFTWARE. #include "platform/program.hpp" #include #include "comgrctx.hpp" +#include "hip_comgr_helper.hpp" + namespace hip { hipError_t ihipFree(void* ptr); // forward declaration of methods required for managed variables @@ -445,48 +447,6 @@ static bool consume(std::string& input, std::string consume_) { return true; } -// Is agent target compatible with generic code object target? -static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& agentTarget) { - // The map is subject to change per removing policy - static std::map genericTargetMap{ - // "gfx9-generic" - {"gfx900", "gfx9-generic"}, - {"gfx902", "gfx9-generic"}, - {"gfx904", "gfx9-generic"}, - {"gfx906", "gfx9-generic"}, - {"gfx909", "gfx9-generic"}, - {"gfx90c", "gfx9-generic"}, - // "gfx9-4-generic" - {"gfx942", "gfx9-4-generic"}, - {"gfx950", "gfx9-4-generic"}, - // "gfx10-1-generic" - {"gfx1010", "gfx10-1-generic"}, - {"gfx1011", "gfx10-1-generic"}, - {"gfx1012", "gfx10-1-generic"}, - {"gfx1013", "gfx10-1-generic"}, - // "gfx10-3-generic" - {"gfx1030", "gfx10-3-generic"}, - {"gfx1031", "gfx10-3-generic"}, - {"gfx1032", "gfx10-3-generic"}, - {"gfx1033", "gfx10-3-generic"}, - {"gfx1034", "gfx10-3-generic"}, - {"gfx1035", "gfx10-3-generic"}, - {"gfx1036", "gfx10-3-generic"}, - // "gfx11-generic" - {"gfx1100", "gfx11-generic"}, - {"gfx1101", "gfx11-generic"}, - {"gfx1102", "gfx11-generic"}, - {"gfx1103", "gfx11-generic"}, - {"gfx1150", "gfx11-generic"}, - {"gfx1151", "gfx11-generic"}, - // "gfx12-generic" - {"gfx1200", "gfx12-generic"}, - {"gfx1201", "gfx12-generic"}, - }; - auto search = genericTargetMap.find(agentTarget); - return search != genericTargetMap.end() && coTarget == search->second; -} - // Trim String till character, will be used to get gpuname // example: input is gfx908:sram-ecc+ and trim char is : // input will become :sram-ecc+. @@ -582,7 +542,7 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, // Check for compatibility if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) { // co_processor is generic target - if (!isCompatibleWithGenericTarget(co_processor, agent_isa_processor)) + if (!helpers::IsCompatibleWithGenericTarget(co_processor, agent_isa_processor)) return false; } else if (agent_isa_processor != co_processor) { return false; @@ -593,7 +553,24 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, if (co_xnack != ' ') { if (co_xnack != isa_xnack) return false; } + return true; +} +bool CodeObject::QueryGenericTarget(std::string agentTarget, std::string& processor, + char& sram_ecc, char& xnack) { + static const std::string head = std::string(kAmdgcnTargetTriple) + '-'; + // Parse agent isa triple target id + if (!consume(agentTarget, head)) { + return false; + } + if (!getTargetIDValue(agentTarget, processor, sram_ecc, xnack)) { + return false; + } + if (processor.empty()) return false; + auto &map = helpers::GenericTargetMapping(); + auto search = map.find(processor); + if (search == map.end()) return false; + processor = head + search->second; return true; } @@ -667,7 +644,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary( bundleEntryId.c_str(), co_triple_target_id.c_str(), genericVersion); for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) { - if (code_objs[dev].first) { + if (code_objs[dev].first != nullptr) { if (!isGenericTarget(code_objs[dev].first)) { continue; // Specific target already found } else if(genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) { @@ -759,14 +736,52 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr( std::set devicesSet{}; // To make sure device is unique + std::set genericDevicesSet{}; // Used to record generic targets + std::vector bundleEntryIDs{}; static const std::string hipv4 = kOffloadKindHipv4_; // bundled code objects need the prefix for (size_t i = 0; i < num_devices; i++) { - devicesSet.insert(hipv4 + agent_triple_target_ids[i]); - } - - for (auto& device : devicesSet) { - bundleEntryIDs.push_back(device.c_str()); + auto res = devicesSet.insert(hipv4 + agent_triple_target_ids[i]); + if (res.second) { + // This is a new device in devicesSet + bundleEntryIDs.push_back(res.first->c_str()); + std::string processor; + char sram_ecc = ' ', xnack = ' '; + if (!QueryGenericTarget(agent_triple_target_ids[i], processor, sram_ecc, xnack)) { + continue; // No generic target for this device + } + // Now processor is generic such as + // amdgcn-amd-amdhsa--gfx9-4-generic, amdgcn-amd-amdhsa--gfx11-generic + processor = hipv4 + processor; + auto ret = genericDevicesSet.insert(processor); + if (ret.second) { + // Without feature + bundleEntryIDs.push_back(ret.first->c_str()); + } + if (xnack != ' ') { + ret = genericDevicesSet.insert(processor + ":xnack" + xnack); + if (ret.second) { + // Generic target with xnack feature + bundleEntryIDs.push_back(ret.first->c_str()); + } + } + if (sram_ecc != ' ') { + processor += ":sramecc"; + processor += sram_ecc; + ret = genericDevicesSet.insert(processor); + if (ret.second) { + // Generic target with sramecc feature + bundleEntryIDs.push_back(ret.first->c_str()); + } + if (xnack != ' ') { + ret = genericDevicesSet.insert(processor + ":xnack" + xnack); + if (ret.second) { + // Generic target with sramecc and xnack features + bundleEntryIDs.push_back(ret.first->c_str()); + } + } + } + } } do { @@ -898,6 +913,8 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr( hipStatus = hipErrorInvalidValue; break; } + ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, "Found bundleEntryId=%s", bundleEntryId.c_str()); + // Remove bundleEntryId_ if (!consume(bundleEntryId, kOffloadHipV4FatBinName_)) { // This is behavour in comgr unbundling which is subject to change. @@ -908,13 +925,22 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr( } trimNameTail(bundleEntryId, '.'); // Remove .fileExtention + // Currently we only support EF_AMDGPU_GENERIC_VERSION_MIN on generic target + uint32_t genericVersion = + bundleEntryId.find("generic") != bundleEntryId.npos ? EF_AMDGPU_GENERIC_VERSION_MIN : 0; char* itemData = nullptr; for (size_t dev = 0; dev < num_devices; ++dev) { - if (code_objs[dev].first) continue; - // LogPrintfError("agent_triple_target_ids[%zu]=%s, bundleEntryId=%s", dev, - // agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str()); - - if (bundleEntryId == agent_triple_target_ids[dev]) { + if (code_objs[dev].first != nullptr) { + if (!isGenericTarget(code_objs[dev].first)) { + continue; // Specific target already found + } else if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) { + continue; // Generic target already found, no need to check another generic + } + } + ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, "agent_triple_target_ids[%zu]=%s, bundleEntryId=%s", + dev, agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str()); + if (isCodeObjectCompatibleWithDevice(bundleEntryId, agent_triple_target_ids[dev], + genericVersion)) { if (itemData == nullptr) { itemSize = 0; comgrStatus = amd::Comgr::get_data(item, &itemSize, nullptr); @@ -924,7 +950,6 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr( hipStatus = hipErrorInvalidValue; break; } - if (itemSize == 0) { // If there isn't a code object for this device, // amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) still returns item with @@ -932,9 +957,8 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr( ClPrint(amd::LOG_INFO, amd::LOG_COMGR, "amd::Comgr::get_data() return 0 size for agent_triple_target_ids[%zu]=%s", dev, agent_triple_target_ids[dev].c_str()); - continue; + break; } - // itemData should be deleted in fatbin's destructor itemData = new char[itemSize]; if (itemData == nullptr) { @@ -952,13 +976,29 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr( break; } } + if (code_objs[dev].first != nullptr) { + // This must be data of generic target + bool used = false; // Still used by other devices? + for (size_t i = 0; i < num_devices; ++i) { + if (dev != i && code_objs[dev].first == code_objs[i].first) { + used = true; + break; + } + } + if (!used) { + delete[] reinterpret_cast(code_objs[dev].first); + } + } else { + --num_code_objs; + } code_objs[dev] = std::make_pair(reinterpret_cast(itemData), itemSize); - --num_code_objs; - ClPrint(amd::LOG_INFO, amd::LOG_COMGR, - "Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s), " + ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, + "Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s, %s), " "Size=%zu, num_code_objs=%zu", dev, agent_triple_target_ids[dev].c_str(), itemData, - isCompressed ? "compressed" : "uncompressed", itemSize, num_code_objs); + isCompressed ? "compressed" : "uncompressed", + genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN ? "generic" : "non-generic", + itemSize, num_code_objs); } } diff --git a/hipamd/src/hip_code_object.hpp b/hipamd/src/hip_code_object.hpp index 7a4e6e74e7..84296ab029 100644 --- a/hipamd/src/hip_code_object.hpp +++ b/hipamd/src/hip_code_object.hpp @@ -95,6 +95,10 @@ class CodeObject { const void* data, size_t size, const std::vector& devices, std::vector>& code_objs); + // Query the generic target of agent target. + // Return true on successfull query, false on failure + static bool QueryGenericTarget(std::string agentTarget, std::string& processor, + char& sram_ecc, char& xnack); protected: //Given an ptr to image or file, extracts to code object //for corresponding devices diff --git a/hipamd/src/hip_comgr_helper.cpp b/hipamd/src/hip_comgr_helper.cpp index 755875c287..e847356d81 100644 --- a/hipamd/src/hip_comgr_helper.cpp +++ b/hipamd/src/hip_comgr_helper.cpp @@ -373,42 +373,6 @@ static bool consume(std::string& input, std::string consume_) { return true; } -// Is agent target compatible with generic code object target? -static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& agentTarget) { - // The map is subject to change per removing policy - static std::map genericTargetMap{ - // "gfx9-generic" - {"gfx900", "gfx9-generic"}, - {"gfx902", "gfx9-generic"}, - {"gfx904", "gfx9-generic"}, - {"gfx906", "gfx9-generic"}, - {"gfx909", "gfx9-generic"}, - {"gfx90c", "gfx9-generic"}, - // "gfx10-1-generic" - {"gfx1010", "gfx10-1-generic"}, - {"gfx1011", "gfx10-1-generic"}, - {"gfx1012", "gfx10-1-generic"}, - {"gfx1013", "gfx10-1-generic"}, - // "gfx10-3-generic" - {"gfx1030", "gfx10-3-generic"}, - {"gfx1031", "gfx10-3-generic"}, - {"gfx1032", "gfx10-3-generic"}, - {"gfx1033", "gfx10-3-generic"}, - {"gfx1034", "gfx10-3-generic"}, - {"gfx1035", "gfx10-3-generic"}, - {"gfx1036", "gfx10-3-generic"}, - // "gfx11-generic" - {"gfx1100", "gfx11-generic"}, - {"gfx1101", "gfx11-generic"}, - {"gfx1102", "gfx11-generic"}, - {"gfx1103", "gfx11-generic"}, - {"gfx1150", "gfx11-generic"}, - {"gfx1151", "gfx11-generic"}, - }; - auto search = genericTargetMap.find(agentTarget); - return search != genericTargetMap.end() && coTarget == search->second; -} - // Trim String till character, will be used to get gpuname // example: input is gfx908:sram-ecc+ and trim char is : // input will become sram-ecc+. @@ -494,7 +458,7 @@ bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, // Check for compatibility if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) { // co_processor is generic target - if (!isCompatibleWithGenericTarget(co_processor, agent_isa_processor)) + if (!IsCompatibleWithGenericTarget(co_processor, agent_isa_processor)) return false; } else if (agent_isa_processor != co_processor) { return false; @@ -1244,6 +1208,51 @@ bool fillMangledNames(std::vector& dataVec, std::map& GenericTargetMapping() { + // The map is subject to change per removing policy + static const std::map genericTargetMap{ + // "gfx9-generic" + {"gfx900", "gfx9-generic"}, + {"gfx902", "gfx9-generic"}, + {"gfx904", "gfx9-generic"}, + {"gfx906", "gfx9-generic"}, + {"gfx909", "gfx9-generic"}, + {"gfx90c", "gfx9-generic"}, + // "gfx9-4-generic" + {"gfx942", "gfx9-4-generic"}, + {"gfx950", "gfx9-4-generic"}, + // "gfx10-1-generic" + {"gfx1010", "gfx10-1-generic"}, + {"gfx1011", "gfx10-1-generic"}, + {"gfx1012", "gfx10-1-generic"}, + {"gfx1013", "gfx10-1-generic"}, + // "gfx10-3-generic" + {"gfx1030", "gfx10-3-generic"}, + {"gfx1031", "gfx10-3-generic"}, + {"gfx1032", "gfx10-3-generic"}, + {"gfx1033", "gfx10-3-generic"}, + {"gfx1034", "gfx10-3-generic"}, + {"gfx1035", "gfx10-3-generic"}, + {"gfx1036", "gfx10-3-generic"}, + // "gfx11-generic" + {"gfx1100", "gfx11-generic"}, + {"gfx1101", "gfx11-generic"}, + {"gfx1102", "gfx11-generic"}, + {"gfx1103", "gfx11-generic"}, + {"gfx1150", "gfx11-generic"}, + {"gfx1151", "gfx11-generic"}, + // "gfx12-generic" + {"gfx1200", "gfx12-generic"}, + {"gfx1201", "gfx12-generic"}, + }; + return genericTargetMap; +} + +bool IsCompatibleWithGenericTarget(const std::string& coTarget, const std::string& agentTarget) { + auto& map = GenericTargetMapping(); + auto search = map.find(agentTarget); + return search != map.end() && coTarget == search->second; +} } // namespace helpers std::vector getLinkOptions(const LinkArguments& args) { @@ -1266,7 +1275,6 @@ std::vector getLinkOptions(const LinkArguments& args) { return res; } - // RTC Program Member Functions RTCProgram::RTCProgram(std::string name) : name_(name) { constexpr bool kComgrVersioned = true; diff --git a/hipamd/src/hip_comgr_helper.hpp b/hipamd/src/hip_comgr_helper.hpp index 98fc72bb7f..260656a715 100644 --- a/hipamd/src/hip_comgr_helper.hpp +++ b/hipamd/src/hip_comgr_helper.hpp @@ -74,6 +74,13 @@ bool UnbundleUsingComgr(std::vector& source, const std::string& isa, std::vector& linkOptions, std::string& buildLog, std::vector& unbundled_spirv_bitcode, const char* bundleEntryIDs, size_t bundleEntryIDsCount); + +// Mapping from targets to generic targets +const std::map& GenericTargetMapping(); + +// Return true if agent target compatible with generic code object target, false otherwise. +// Both targets should not have any feature. +bool IsCompatibleWithGenericTarget(const std::string& coTarget, const std::string& agentTarget); } // namespace helpers struct LinkArguments {