SWDEV-508863 - Support generic target in compressed fatbin (#44)

This commit is contained in:
Sang, Tao
2025-03-27 10:43:51 -04:00
committed by GitHub
orang tua 686dd56a4e
melakukan 8d90b44a1b
4 mengubah file dengan 158 tambahan dan 99 penghapusan
+101 -61
Melihat File
@@ -31,6 +31,8 @@ THE SOFTWARE.
#include "platform/program.hpp"
#include <elf/elf.hpp>
#include "comgrctx.hpp"
#include "hip_comgr_helper.hpp"
namespace hip {
hipError_t ihipFree(void* ptr);
// forward declaration of methods required for managed variables
@@ -445,48 +447,6 @@ static bool consume(std::string& input, std::string consume_) {
return true;
}
// Is agent target compatible with generic code object target?
static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& agentTarget) {
// The map is subject to change per removing policy
static std::map<std::string, std::string> genericTargetMap{
// "gfx9-generic"
{"gfx900", "gfx9-generic"},
{"gfx902", "gfx9-generic"},
{"gfx904", "gfx9-generic"},
{"gfx906", "gfx9-generic"},
{"gfx909", "gfx9-generic"},
{"gfx90c", "gfx9-generic"},
// "gfx9-4-generic"
{"gfx942", "gfx9-4-generic"},
{"gfx950", "gfx9-4-generic"},
// "gfx10-1-generic"
{"gfx1010", "gfx10-1-generic"},
{"gfx1011", "gfx10-1-generic"},
{"gfx1012", "gfx10-1-generic"},
{"gfx1013", "gfx10-1-generic"},
// "gfx10-3-generic"
{"gfx1030", "gfx10-3-generic"},
{"gfx1031", "gfx10-3-generic"},
{"gfx1032", "gfx10-3-generic"},
{"gfx1033", "gfx10-3-generic"},
{"gfx1034", "gfx10-3-generic"},
{"gfx1035", "gfx10-3-generic"},
{"gfx1036", "gfx10-3-generic"},
// "gfx11-generic"
{"gfx1100", "gfx11-generic"},
{"gfx1101", "gfx11-generic"},
{"gfx1102", "gfx11-generic"},
{"gfx1103", "gfx11-generic"},
{"gfx1150", "gfx11-generic"},
{"gfx1151", "gfx11-generic"},
// "gfx12-generic"
{"gfx1200", "gfx12-generic"},
{"gfx1201", "gfx12-generic"},
};
auto search = genericTargetMap.find(agentTarget);
return search != genericTargetMap.end() && coTarget == search->second;
}
// Trim String till character, will be used to get gpuname
// example: input is gfx908:sram-ecc+ and trim char is :
// input will become :sram-ecc+.
@@ -582,7 +542,7 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
// Check for compatibility
if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
// co_processor is generic target
if (!isCompatibleWithGenericTarget(co_processor, agent_isa_processor))
if (!helpers::IsCompatibleWithGenericTarget(co_processor, agent_isa_processor))
return false;
} else if (agent_isa_processor != co_processor) {
return false;
@@ -593,7 +553,24 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
if (co_xnack != ' ') {
if (co_xnack != isa_xnack) return false;
}
return true;
}
bool CodeObject::QueryGenericTarget(std::string agentTarget, std::string& processor,
char& sram_ecc, char& xnack) {
static const std::string head = std::string(kAmdgcnTargetTriple) + '-';
// Parse agent isa triple target id
if (!consume(agentTarget, head)) {
return false;
}
if (!getTargetIDValue(agentTarget, processor, sram_ecc, xnack)) {
return false;
}
if (processor.empty()) return false;
auto &map = helpers::GenericTargetMapping();
auto search = map.find(processor);
if (search == map.end()) return false;
processor = head + search->second;
return true;
}
@@ -667,7 +644,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
bundleEntryId.c_str(), co_triple_target_id.c_str(), genericVersion);
for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) {
if (code_objs[dev].first) {
if (code_objs[dev].first != nullptr) {
if (!isGenericTarget(code_objs[dev].first)) {
continue; // Specific target already found
} else if(genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
@@ -759,14 +736,52 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
std::set<std::string> devicesSet{}; // To make sure device is unique
std::set<std::string> genericDevicesSet{}; // Used to record generic targets
std::vector<const char*> bundleEntryIDs{};
static const std::string hipv4 = kOffloadKindHipv4_; // bundled code objects need the prefix
for (size_t i = 0; i < num_devices; i++) {
devicesSet.insert(hipv4 + agent_triple_target_ids[i]);
}
for (auto& device : devicesSet) {
bundleEntryIDs.push_back(device.c_str());
auto res = devicesSet.insert(hipv4 + agent_triple_target_ids[i]);
if (res.second) {
// This is a new device in devicesSet
bundleEntryIDs.push_back(res.first->c_str());
std::string processor;
char sram_ecc = ' ', xnack = ' ';
if (!QueryGenericTarget(agent_triple_target_ids[i], processor, sram_ecc, xnack)) {
continue; // No generic target for this device
}
// Now processor is generic such as
// amdgcn-amd-amdhsa--gfx9-4-generic, amdgcn-amd-amdhsa--gfx11-generic
processor = hipv4 + processor;
auto ret = genericDevicesSet.insert(processor);
if (ret.second) {
// Without feature
bundleEntryIDs.push_back(ret.first->c_str());
}
if (xnack != ' ') {
ret = genericDevicesSet.insert(processor + ":xnack" + xnack);
if (ret.second) {
// Generic target with xnack feature
bundleEntryIDs.push_back(ret.first->c_str());
}
}
if (sram_ecc != ' ') {
processor += ":sramecc";
processor += sram_ecc;
ret = genericDevicesSet.insert(processor);
if (ret.second) {
// Generic target with sramecc feature
bundleEntryIDs.push_back(ret.first->c_str());
}
if (xnack != ' ') {
ret = genericDevicesSet.insert(processor + ":xnack" + xnack);
if (ret.second) {
// Generic target with sramecc and xnack features
bundleEntryIDs.push_back(ret.first->c_str());
}
}
}
}
}
do {
@@ -898,6 +913,8 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
hipStatus = hipErrorInvalidValue;
break;
}
ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, "Found bundleEntryId=%s", bundleEntryId.c_str());
// Remove bundleEntryId_
if (!consume(bundleEntryId, kOffloadHipV4FatBinName_)) {
// This is behavour in comgr unbundling which is subject to change.
@@ -908,13 +925,22 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
}
trimNameTail(bundleEntryId, '.'); // Remove .fileExtention
// Currently we only support EF_AMDGPU_GENERIC_VERSION_MIN on generic target
uint32_t genericVersion =
bundleEntryId.find("generic") != bundleEntryId.npos ? EF_AMDGPU_GENERIC_VERSION_MIN : 0;
char* itemData = nullptr;
for (size_t dev = 0; dev < num_devices; ++dev) {
if (code_objs[dev].first) continue;
// LogPrintfError("agent_triple_target_ids[%zu]=%s, bundleEntryId=%s", dev,
// agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str());
if (bundleEntryId == agent_triple_target_ids[dev]) {
if (code_objs[dev].first != nullptr) {
if (!isGenericTarget(code_objs[dev].first)) {
continue; // Specific target already found
} else if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
continue; // Generic target already found, no need to check another generic
}
}
ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, "agent_triple_target_ids[%zu]=%s, bundleEntryId=%s",
dev, agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str());
if (isCodeObjectCompatibleWithDevice(bundleEntryId, agent_triple_target_ids[dev],
genericVersion)) {
if (itemData == nullptr) {
itemSize = 0;
comgrStatus = amd::Comgr::get_data(item, &itemSize, nullptr);
@@ -924,7 +950,6 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
hipStatus = hipErrorInvalidValue;
break;
}
if (itemSize == 0) {
// If there isn't a code object for this device,
// amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) still returns item with
@@ -932,9 +957,8 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
ClPrint(amd::LOG_INFO, amd::LOG_COMGR,
"amd::Comgr::get_data() return 0 size for agent_triple_target_ids[%zu]=%s", dev,
agent_triple_target_ids[dev].c_str());
continue;
break;
}
// itemData should be deleted in fatbin's destructor
itemData = new char[itemSize];
if (itemData == nullptr) {
@@ -952,13 +976,29 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
break;
}
}
if (code_objs[dev].first != nullptr) {
// This must be data of generic target
bool used = false; // Still used by other devices?
for (size_t i = 0; i < num_devices; ++i) {
if (dev != i && code_objs[dev].first == code_objs[i].first) {
used = true;
break;
}
}
if (!used) {
delete[] reinterpret_cast<const char*>(code_objs[dev].first);
}
} else {
--num_code_objs;
}
code_objs[dev] = std::make_pair(reinterpret_cast<const void*>(itemData), itemSize);
--num_code_objs;
ClPrint(amd::LOG_INFO, amd::LOG_COMGR,
"Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s), "
ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR,
"Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s, %s), "
"Size=%zu, num_code_objs=%zu",
dev, agent_triple_target_ids[dev].c_str(), itemData,
isCompressed ? "compressed" : "uncompressed", itemSize, num_code_objs);
isCompressed ? "compressed" : "uncompressed",
genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN ? "generic" : "non-generic",
itemSize, num_code_objs);
}
}