SWDEV-508863 - Support generic target in compressed fatbin (#44)
This commit is contained in:
@@ -31,6 +31,8 @@ THE SOFTWARE.
|
||||
#include "platform/program.hpp"
|
||||
#include <elf/elf.hpp>
|
||||
#include "comgrctx.hpp"
|
||||
#include "hip_comgr_helper.hpp"
|
||||
|
||||
namespace hip {
|
||||
hipError_t ihipFree(void* ptr);
|
||||
// forward declaration of methods required for managed variables
|
||||
@@ -445,48 +447,6 @@ static bool consume(std::string& input, std::string consume_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Is agent target compatible with generic code object target?
|
||||
static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& agentTarget) {
|
||||
// The map is subject to change per removing policy
|
||||
static std::map<std::string, std::string> genericTargetMap{
|
||||
// "gfx9-generic"
|
||||
{"gfx900", "gfx9-generic"},
|
||||
{"gfx902", "gfx9-generic"},
|
||||
{"gfx904", "gfx9-generic"},
|
||||
{"gfx906", "gfx9-generic"},
|
||||
{"gfx909", "gfx9-generic"},
|
||||
{"gfx90c", "gfx9-generic"},
|
||||
// "gfx9-4-generic"
|
||||
{"gfx942", "gfx9-4-generic"},
|
||||
{"gfx950", "gfx9-4-generic"},
|
||||
// "gfx10-1-generic"
|
||||
{"gfx1010", "gfx10-1-generic"},
|
||||
{"gfx1011", "gfx10-1-generic"},
|
||||
{"gfx1012", "gfx10-1-generic"},
|
||||
{"gfx1013", "gfx10-1-generic"},
|
||||
// "gfx10-3-generic"
|
||||
{"gfx1030", "gfx10-3-generic"},
|
||||
{"gfx1031", "gfx10-3-generic"},
|
||||
{"gfx1032", "gfx10-3-generic"},
|
||||
{"gfx1033", "gfx10-3-generic"},
|
||||
{"gfx1034", "gfx10-3-generic"},
|
||||
{"gfx1035", "gfx10-3-generic"},
|
||||
{"gfx1036", "gfx10-3-generic"},
|
||||
// "gfx11-generic"
|
||||
{"gfx1100", "gfx11-generic"},
|
||||
{"gfx1101", "gfx11-generic"},
|
||||
{"gfx1102", "gfx11-generic"},
|
||||
{"gfx1103", "gfx11-generic"},
|
||||
{"gfx1150", "gfx11-generic"},
|
||||
{"gfx1151", "gfx11-generic"},
|
||||
// "gfx12-generic"
|
||||
{"gfx1200", "gfx12-generic"},
|
||||
{"gfx1201", "gfx12-generic"},
|
||||
};
|
||||
auto search = genericTargetMap.find(agentTarget);
|
||||
return search != genericTargetMap.end() && coTarget == search->second;
|
||||
}
|
||||
|
||||
// Trim String till character, will be used to get gpuname
|
||||
// example: input is gfx908:sram-ecc+ and trim char is :
|
||||
// input will become :sram-ecc+.
|
||||
@@ -582,7 +542,7 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
|
||||
// Check for compatibility
|
||||
if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
|
||||
// co_processor is generic target
|
||||
if (!isCompatibleWithGenericTarget(co_processor, agent_isa_processor))
|
||||
if (!helpers::IsCompatibleWithGenericTarget(co_processor, agent_isa_processor))
|
||||
return false;
|
||||
} else if (agent_isa_processor != co_processor) {
|
||||
return false;
|
||||
@@ -593,7 +553,24 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
|
||||
if (co_xnack != ' ') {
|
||||
if (co_xnack != isa_xnack) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CodeObject::QueryGenericTarget(std::string agentTarget, std::string& processor,
|
||||
char& sram_ecc, char& xnack) {
|
||||
static const std::string head = std::string(kAmdgcnTargetTriple) + '-';
|
||||
// Parse agent isa triple target id
|
||||
if (!consume(agentTarget, head)) {
|
||||
return false;
|
||||
}
|
||||
if (!getTargetIDValue(agentTarget, processor, sram_ecc, xnack)) {
|
||||
return false;
|
||||
}
|
||||
if (processor.empty()) return false;
|
||||
auto &map = helpers::GenericTargetMapping();
|
||||
auto search = map.find(processor);
|
||||
if (search == map.end()) return false;
|
||||
processor = head + search->second;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -667,7 +644,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
|
||||
bundleEntryId.c_str(), co_triple_target_id.c_str(), genericVersion);
|
||||
|
||||
for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) {
|
||||
if (code_objs[dev].first) {
|
||||
if (code_objs[dev].first != nullptr) {
|
||||
if (!isGenericTarget(code_objs[dev].first)) {
|
||||
continue; // Specific target already found
|
||||
} else if(genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
|
||||
@@ -759,14 +736,52 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
|
||||
|
||||
|
||||
std::set<std::string> devicesSet{}; // To make sure device is unique
|
||||
std::set<std::string> genericDevicesSet{}; // Used to record generic targets
|
||||
|
||||
std::vector<const char*> bundleEntryIDs{};
|
||||
static const std::string hipv4 = kOffloadKindHipv4_; // bundled code objects need the prefix
|
||||
for (size_t i = 0; i < num_devices; i++) {
|
||||
devicesSet.insert(hipv4 + agent_triple_target_ids[i]);
|
||||
}
|
||||
|
||||
for (auto& device : devicesSet) {
|
||||
bundleEntryIDs.push_back(device.c_str());
|
||||
auto res = devicesSet.insert(hipv4 + agent_triple_target_ids[i]);
|
||||
if (res.second) {
|
||||
// This is a new device in devicesSet
|
||||
bundleEntryIDs.push_back(res.first->c_str());
|
||||
std::string processor;
|
||||
char sram_ecc = ' ', xnack = ' ';
|
||||
if (!QueryGenericTarget(agent_triple_target_ids[i], processor, sram_ecc, xnack)) {
|
||||
continue; // No generic target for this device
|
||||
}
|
||||
// Now processor is generic such as
|
||||
// amdgcn-amd-amdhsa--gfx9-4-generic, amdgcn-amd-amdhsa--gfx11-generic
|
||||
processor = hipv4 + processor;
|
||||
auto ret = genericDevicesSet.insert(processor);
|
||||
if (ret.second) {
|
||||
// Without feature
|
||||
bundleEntryIDs.push_back(ret.first->c_str());
|
||||
}
|
||||
if (xnack != ' ') {
|
||||
ret = genericDevicesSet.insert(processor + ":xnack" + xnack);
|
||||
if (ret.second) {
|
||||
// Generic target with xnack feature
|
||||
bundleEntryIDs.push_back(ret.first->c_str());
|
||||
}
|
||||
}
|
||||
if (sram_ecc != ' ') {
|
||||
processor += ":sramecc";
|
||||
processor += sram_ecc;
|
||||
ret = genericDevicesSet.insert(processor);
|
||||
if (ret.second) {
|
||||
// Generic target with sramecc feature
|
||||
bundleEntryIDs.push_back(ret.first->c_str());
|
||||
}
|
||||
if (xnack != ' ') {
|
||||
ret = genericDevicesSet.insert(processor + ":xnack" + xnack);
|
||||
if (ret.second) {
|
||||
// Generic target with sramecc and xnack features
|
||||
bundleEntryIDs.push_back(ret.first->c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
@@ -898,6 +913,8 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, "Found bundleEntryId=%s", bundleEntryId.c_str());
|
||||
|
||||
// Remove bundleEntryId_
|
||||
if (!consume(bundleEntryId, kOffloadHipV4FatBinName_)) {
|
||||
// This is behavour in comgr unbundling which is subject to change.
|
||||
@@ -908,13 +925,22 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
|
||||
}
|
||||
trimNameTail(bundleEntryId, '.'); // Remove .fileExtention
|
||||
|
||||
// Currently we only support EF_AMDGPU_GENERIC_VERSION_MIN on generic target
|
||||
uint32_t genericVersion =
|
||||
bundleEntryId.find("generic") != bundleEntryId.npos ? EF_AMDGPU_GENERIC_VERSION_MIN : 0;
|
||||
char* itemData = nullptr;
|
||||
for (size_t dev = 0; dev < num_devices; ++dev) {
|
||||
if (code_objs[dev].first) continue;
|
||||
// LogPrintfError("agent_triple_target_ids[%zu]=%s, bundleEntryId=%s", dev,
|
||||
// agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str());
|
||||
|
||||
if (bundleEntryId == agent_triple_target_ids[dev]) {
|
||||
if (code_objs[dev].first != nullptr) {
|
||||
if (!isGenericTarget(code_objs[dev].first)) {
|
||||
continue; // Specific target already found
|
||||
} else if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
|
||||
continue; // Generic target already found, no need to check another generic
|
||||
}
|
||||
}
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR, "agent_triple_target_ids[%zu]=%s, bundleEntryId=%s",
|
||||
dev, agent_triple_target_ids[dev].c_str(), bundleEntryId.c_str());
|
||||
if (isCodeObjectCompatibleWithDevice(bundleEntryId, agent_triple_target_ids[dev],
|
||||
genericVersion)) {
|
||||
if (itemData == nullptr) {
|
||||
itemSize = 0;
|
||||
comgrStatus = amd::Comgr::get_data(item, &itemSize, nullptr);
|
||||
@@ -924,7 +950,6 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
|
||||
hipStatus = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
|
||||
if (itemSize == 0) {
|
||||
// If there isn't a code object for this device,
|
||||
// amd::Comgr::do_action(AMD_COMGR_ACTION_UNBUNDLE) still returns item with
|
||||
@@ -932,9 +957,8 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_COMGR,
|
||||
"amd::Comgr::get_data() return 0 size for agent_triple_target_ids[%zu]=%s", dev,
|
||||
agent_triple_target_ids[dev].c_str());
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
|
||||
// itemData should be deleted in fatbin's destructor
|
||||
itemData = new char[itemSize];
|
||||
if (itemData == nullptr) {
|
||||
@@ -952,13 +976,29 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (code_objs[dev].first != nullptr) {
|
||||
// This must be data of generic target
|
||||
bool used = false; // Still used by other devices?
|
||||
for (size_t i = 0; i < num_devices; ++i) {
|
||||
if (dev != i && code_objs[dev].first == code_objs[i].first) {
|
||||
used = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!used) {
|
||||
delete[] reinterpret_cast<const char*>(code_objs[dev].first);
|
||||
}
|
||||
} else {
|
||||
--num_code_objs;
|
||||
}
|
||||
code_objs[dev] = std::make_pair(reinterpret_cast<const void*>(itemData), itemSize);
|
||||
--num_code_objs;
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_COMGR,
|
||||
"Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s), "
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COMGR,
|
||||
"Found agent_triple_target_ids[%zu]=%s: item: Data=%p(%s, %s), "
|
||||
"Size=%zu, num_code_objs=%zu",
|
||||
dev, agent_triple_target_ids[dev].c_str(), itemData,
|
||||
isCompressed ? "compressed" : "uncompressed", itemSize, num_code_objs);
|
||||
isCompressed ? "compressed" : "uncompressed",
|
||||
genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN ? "generic" : "non-generic",
|
||||
itemSize, num_code_objs);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user