SWDEV-447973 - Support generic targets
Change-Id: I32db83843e45e0f013591493aafd7a532c881e16
[ROCm/clr commit: f1f4f40c5b]
Este cometimento está contido em:
cometido por
Tao Sang
ascendente
3e8d5599d4
cometimento
f3e3d8178b
@@ -32,7 +32,8 @@ enum {
|
||||
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
|
||||
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
|
||||
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
|
||||
ELFABIVERSION_AMDGPU_HSA_V5 = 3
|
||||
ELFABIVERSION_AMDGPU_HSA_V5 = 3,
|
||||
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
|
||||
};
|
||||
|
||||
// AMDGPU specific e_flags
|
||||
@@ -109,10 +110,21 @@ enum : unsigned {
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1152 = 0x055,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X56 = 0x056,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57 = 0x057,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X58 = 0x058,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC = 0x059,
|
||||
|
||||
// First/last AMDGCN-based processors.
|
||||
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1201,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC,
|
||||
|
||||
// Indicates if the "xnack" target feature is enabled for all code contained
|
||||
// in the object.
|
||||
@@ -125,7 +137,8 @@ enum : unsigned {
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,
|
||||
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4,
|
||||
// ELFABIVERSION_AMDGPU_HSA_V5 and ELFABIVERSION_AMDGPU_HSA_V6.
|
||||
EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
|
||||
EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
|
||||
EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
|
||||
@@ -133,10 +146,17 @@ enum : unsigned {
|
||||
EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,
|
||||
|
||||
// SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
|
||||
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4,
|
||||
// ELFABIVERSION_AMDGPU_HSA_V5 and ELFABIVERSION_AMDGPU_HSA_V6.
|
||||
EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
|
||||
EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
|
||||
EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
|
||||
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
|
||||
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
|
||||
|
||||
// Generic target versioning. This is contained in the list byte of EFLAGS.
|
||||
EF_AMDGPU_GENERIC_VERSION = 0xff000000,
|
||||
EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
|
||||
EF_AMDGPU_GENERIC_VERSION_MIN = 1,
|
||||
EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
|
||||
};
|
||||
|
||||
@@ -97,6 +97,33 @@ bool CodeObject::IsClangOffloadMagicBundle(const void* data, bool& isCompressed)
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned int CodeObject::getGenericVersion(const void* image) {
|
||||
const Elf64_Ehdr* ehdr = reinterpret_cast<const Elf64_Ehdr*>(image);
|
||||
return (ehdr->e_machine == EM_AMDGPU && ehdr->e_ident[EI_OSABI] == ELFOSABI_AMDGPU_HSA &&
|
||||
ehdr->e_ident[EI_ABIVERSION] == ELFABIVERSION_AMDGPU_HSA_V6) ?
|
||||
((ehdr->e_flags & EF_AMDGPU_GENERIC_VERSION) >> EF_AMDGPU_GENERIC_VERSION_OFFSET) : 0;
|
||||
}
|
||||
|
||||
bool CodeObject::isGenericTarget(const void* image) {
|
||||
return getGenericVersion(image) >= EF_AMDGPU_GENERIC_VERSION_MIN;
|
||||
}
|
||||
|
||||
bool CodeObject::containGenericTarget(const void *data) {
|
||||
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
|
||||
const auto* desc = &obheader->desc[0];
|
||||
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize)) {
|
||||
if (desc->size == 0) continue;
|
||||
const void* image =
|
||||
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + desc->offset);
|
||||
if (isGenericTarget(image)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); }
|
||||
|
||||
static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported,
|
||||
@@ -307,6 +334,31 @@ static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupp
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1201";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx9-generic";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx10-1-generic";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx10-3-generic";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx11-generic";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx12-generic";
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@@ -320,7 +372,7 @@ static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string
|
||||
if (ehdr->e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) return false;
|
||||
|
||||
bool isXnackSupported{false}, isSramEccSupported{false};
|
||||
|
||||
const char* vstr = nullptr;
|
||||
std::string proc_name;
|
||||
if (!getProcName(ehdr->e_flags, proc_name, isXnackSupported, isSramEccSupported)) return false;
|
||||
target_id = std::string(kAmdgcnTargetTriple) + '-' + proc_name;
|
||||
@@ -349,11 +401,14 @@ static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string
|
||||
}
|
||||
|
||||
case ELFABIVERSION_AMDGPU_HSA_V4:
|
||||
case ELFABIVERSION_AMDGPU_HSA_V5: {
|
||||
case ELFABIVERSION_AMDGPU_HSA_V5:
|
||||
case ELFABIVERSION_AMDGPU_HSA_V6: {
|
||||
if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V4) {
|
||||
LogPrintfInfo("[Code Object V4, target id:%s]", target_id.c_str());
|
||||
} else {
|
||||
LogPrintfInfo("[Code Object V5, target id:%s]", target_id.c_str());
|
||||
vstr = "V4";
|
||||
} else if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V5) {
|
||||
vstr = "V5";
|
||||
} else if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V6) {
|
||||
vstr = "V6";
|
||||
}
|
||||
unsigned co_sram_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_SRAMECC_V4;
|
||||
if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_OFF_V4)
|
||||
@@ -364,8 +419,10 @@ static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string
|
||||
unsigned co_xnack_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_XNACK_V4;
|
||||
if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_OFF_V4)
|
||||
target_id += ":xnack-";
|
||||
|
||||
else if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_ON_V4)
|
||||
target_id += ":xnack+";
|
||||
LogPrintfInfo("[Code Object %s, target id: %s]", vstr, target_id.c_str());
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -387,6 +444,45 @@ static bool consume(std::string& input, std::string consume_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Is agent target compatible with generic code object target?
|
||||
static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& agentTarget) {
|
||||
// The map is subject to change per removing policy
|
||||
static std::map<std::string, std::string> genericTargetMap{
|
||||
// "gfx9-generic"
|
||||
{"gfx900", "gfx9-generic"},
|
||||
{"gfx902", "gfx9-generic"},
|
||||
{"gfx904", "gfx9-generic"},
|
||||
{"gfx906", "gfx9-generic"},
|
||||
{"gfx909", "gfx9-generic"},
|
||||
{"gfx90c", "gfx9-generic"},
|
||||
// "gfx10-1-generic"
|
||||
{"gfx1010", "gfx10-1-generic"},
|
||||
{"gfx1011", "gfx10-1-generic"},
|
||||
{"gfx1012", "gfx10-1-generic"},
|
||||
{"gfx1013", "gfx10-1-generic"},
|
||||
// "gfx10-3-generic"
|
||||
{"gfx1030", "gfx10-3-generic"},
|
||||
{"gfx1031", "gfx10-3-generic"},
|
||||
{"gfx1032", "gfx10-3-generic"},
|
||||
{"gfx1033", "gfx10-3-generic"},
|
||||
{"gfx1034", "gfx10-3-generic"},
|
||||
{"gfx1035", "gfx10-3-generic"},
|
||||
{"gfx1036", "gfx10-3-generic"},
|
||||
// "gfx11-generic"
|
||||
{"gfx1100", "gfx11-generic"},
|
||||
{"gfx1101", "gfx11-generic"},
|
||||
{"gfx1102", "gfx11-generic"},
|
||||
{"gfx1103", "gfx11-generic"},
|
||||
{"gfx1150", "gfx11-generic"},
|
||||
{"gfx1151", "gfx11-generic"},
|
||||
// "gfx12-generic"
|
||||
{"gfx1200", "gfx12-generic"},
|
||||
{"gfx1201", "gfx12-generic"},
|
||||
};
|
||||
auto search = genericTargetMap.find(agentTarget);
|
||||
return search != genericTargetMap.end() && coTarget == search->second;
|
||||
}
|
||||
|
||||
// Trim String till character, will be used to get gpuname
|
||||
// example: input is gfx908:sram-ecc+ and trim char is :
|
||||
// input will become :sram-ecc+.
|
||||
@@ -434,12 +530,11 @@ static bool getTargetIDValue(std::string& input, std::string& processor, char& s
|
||||
}
|
||||
|
||||
static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_object,
|
||||
std::string& co_triple_target_id) {
|
||||
std::string& co_triple_target_id) {
|
||||
std::string offload_kind = trimName(bundled_co_entry_id, '-');
|
||||
if (offload_kind != kOffloadKindHipv4 && offload_kind != kOffloadKindHip &&
|
||||
offload_kind != kOffloadKindHcc)
|
||||
return false;
|
||||
|
||||
if (offload_kind != kOffloadKindHipv4)
|
||||
return getTripleTargetIDFromCodeObject(code_object, co_triple_target_id);
|
||||
|
||||
@@ -450,7 +545,7 @@ static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_
|
||||
}
|
||||
|
||||
static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
|
||||
std::string agent_triple_target_id) {
|
||||
std::string agent_triple_target_id, unsigned int genericVersion) {
|
||||
// Primitive Check
|
||||
if (co_triple_target_id == agent_triple_target_id) return true;
|
||||
|
||||
@@ -481,7 +576,13 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
|
||||
if (!agent_triple_target_id.empty()) return false;
|
||||
|
||||
// Check for compatibility
|
||||
if (agent_isa_processor != co_processor) return false;
|
||||
if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
|
||||
// co_processor is generic target
|
||||
if (!isCompatibleWithGenericTarget(co_processor, agent_isa_processor))
|
||||
return false;
|
||||
} else if (agent_isa_processor != co_processor) {
|
||||
return false;
|
||||
}
|
||||
if (co_sram_ecc != ' ') {
|
||||
if (co_sram_ecc != isa_sram_ecc) return false;
|
||||
}
|
||||
@@ -496,7 +597,7 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
|
||||
hipError_t CodeObject::ExtractCodeObjectFromFile(
|
||||
amd::Os::FileDesc fdesc, size_t fsize, const void** image,
|
||||
const std::vector<std::string>& device_names,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs) {
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs, size_t foffset) {
|
||||
if (!amd::Os::isValidFileDesc(fdesc)) {
|
||||
return hipErrorFileNotFound;
|
||||
}
|
||||
@@ -504,7 +605,7 @@ hipError_t CodeObject::ExtractCodeObjectFromFile(
|
||||
// Map the file to memory, with offset 0.
|
||||
// file will be unmapped in ModuleUnload
|
||||
// const void* image = nullptr;
|
||||
if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, 0, image)) {
|
||||
if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, foffset, image)) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
@@ -528,8 +629,9 @@ hipError_t CodeObject::ExtractCodeObjectFromMemory(
|
||||
hipError_t CodeObject::extractCodeObjectFromFatBinary(
|
||||
const void* data, const std::vector<std::string>& agent_triple_target_ids,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs) {
|
||||
std::string magic((const char*)data, kOffloadBundleUncompressedMagicStrSize);
|
||||
if (magic.compare(kOffloadBundleUncompressedMagicStr)) {
|
||||
bool isCompressed = false;
|
||||
if (!IsClangOffloadMagicBundle(data, isCompressed) || isCompressed) {
|
||||
LogPrintfInfo("IsClangOffloadMagicBundle(%p) return false or isCompressed is true", data);
|
||||
return hipErrorInvalidKernelFile;
|
||||
}
|
||||
|
||||
@@ -554,13 +656,23 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
|
||||
std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize};
|
||||
|
||||
std::string co_triple_target_id;
|
||||
unsigned int genericVersion = getGenericVersion(image);
|
||||
if (!getTripleTargetID(bundleEntryId, image, co_triple_target_id)) continue;
|
||||
LogPrintfInfo("bundleEntryId=%s, co_triple_target_id=%s, genericVersion=%d\n", bundleEntryId.c_str(),
|
||||
co_triple_target_id.c_str(), genericVersion);
|
||||
|
||||
for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) {
|
||||
if (code_objs[dev].first) continue;
|
||||
if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev])) {
|
||||
if (code_objs[dev].first) {
|
||||
// Specific target already matched, skipped.
|
||||
// But for generic target, we will continue searching for matched specific target.
|
||||
if (!isGenericTarget(code_objs[dev].first)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev],
|
||||
genericVersion)) {
|
||||
if (code_objs[dev].first == nullptr) --num_code_objs;
|
||||
code_objs[dev] = std::make_pair(image, image_size);
|
||||
--num_code_objs;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -879,7 +991,6 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
|
||||
|
||||
std::string co_triple_target_id;
|
||||
bool valid_co = getTripleTargetID(bundleEntryId, image, co_triple_target_id);
|
||||
|
||||
if (valid_co) {
|
||||
LogPrintfError(" %s - [Code object targetID is %s]", bundleEntryId.c_str(),
|
||||
co_triple_target_id.c_str());
|
||||
|
||||
@@ -52,7 +52,7 @@ class CodeObject {
|
||||
// return code_objs{binary_ptr, binary_size}, which could be used to determine foffset
|
||||
static hipError_t ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t fsize,
|
||||
const void ** image, const std::vector<std::string>& device_names,
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs);
|
||||
std::vector<std::pair<const void*, size_t>>& code_objs, size_t foffset);
|
||||
|
||||
// Given an ptr to memory, extracts to code object for corresponding devices,
|
||||
// returns code_objs{binary_ptr, binary_size} and uniform resource indicator
|
||||
@@ -65,6 +65,12 @@ class CodeObject {
|
||||
|
||||
static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed);
|
||||
|
||||
static unsigned int getGenericVersion(const void* image);
|
||||
|
||||
static bool isGenericTarget(const void* image);
|
||||
|
||||
static bool containGenericTarget(const void *data);
|
||||
|
||||
// Return size of fat bin
|
||||
static size_t getFatbinSize(const void* data, const bool isCompressed = false);
|
||||
|
||||
|
||||
@@ -118,7 +118,8 @@ void ListAllDeviceWithNoCOFromBundle(const std::unordered_map<std::string,
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices) {
|
||||
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices,
|
||||
bool &containGenericTarget) {
|
||||
amd_comgr_data_t data_object {0};
|
||||
amd_comgr_status_t comgr_status = AMD_COMGR_STATUS_SUCCESS;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
@@ -187,6 +188,13 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!isCompressed) {
|
||||
if (CodeObject::containGenericTarget(image_)) {
|
||||
LogInfo("offload bundle contains generic target code object");
|
||||
containGenericTarget = true;
|
||||
return hipErrorNoBinaryForGpu; // This path doesn't support generic target
|
||||
}
|
||||
}
|
||||
if (isCompressed || HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION) {
|
||||
size_t major = 0, minor = 0;
|
||||
amd::Comgr::get_version(&major, &minor);
|
||||
@@ -294,7 +302,7 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
// Clean up file and memory resouces if hip_status failed for some reason.
|
||||
if (hip_status != hipSuccess && hip_status != hipErrorInvalidKernelFile) {
|
||||
if (image_mapped_) {
|
||||
if (!amd::Os::MemoryUnmapFile(image_, fsize_))
|
||||
if (!amd::Os::MemoryUnmapFile(image_, ufd_->fsize_))
|
||||
guarantee(false, "Cannot unmap the file");
|
||||
|
||||
image_ = nullptr;
|
||||
@@ -323,9 +331,10 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
|
||||
|
||||
hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector<hip::Device*>& devices) {
|
||||
if (!HIP_USE_RUNTIME_UNBUNDLER) {
|
||||
return ExtractFatBinaryUsingCOMGR(devices);
|
||||
bool containGenericTarget = false;
|
||||
hipError_t status = ExtractFatBinaryUsingCOMGR(devices, containGenericTarget);
|
||||
if (!containGenericTarget) return status;
|
||||
}
|
||||
|
||||
hipError_t hip_error = hipSuccess;
|
||||
std::vector<std::pair<const void*, size_t>> code_objs;
|
||||
|
||||
@@ -335,9 +344,12 @@ hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector<hip::Device*>& devi
|
||||
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
|
||||
device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName());
|
||||
}
|
||||
|
||||
// We are given file name, get the file desc and file size
|
||||
if (fname_.size() > 0) {
|
||||
if (image_ != nullptr) {
|
||||
// We are directly given image pointer directly, try to extract file desc & file Size
|
||||
hip_error = CodeObject::ExtractCodeObjectFromMemory(image_,
|
||||
device_names, code_objs, uri_);
|
||||
} else if (fname_.size() > 0) {
|
||||
// We are given file name, get the file desc and file size
|
||||
// Get File Handle & size of the file.
|
||||
if (!amd::Os::GetFileHandle(fname_.c_str(), &fdesc_, &fsize_)) {
|
||||
return hipErrorFileNotFound;
|
||||
@@ -348,12 +360,7 @@ hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector<hip::Device*>& devi
|
||||
|
||||
// Extract the code object from file
|
||||
hip_error = CodeObject::ExtractCodeObjectFromFile(fdesc_, fsize_, &image_,
|
||||
device_names, code_objs);
|
||||
|
||||
} else if (image_ != nullptr) {
|
||||
// We are directly given image pointer directly, try to extract file desc & file Size
|
||||
hip_error = CodeObject::ExtractCodeObjectFromMemory(image_,
|
||||
device_names, code_objs, uri_);
|
||||
device_names, code_objs, foffset_);
|
||||
} else {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
@@ -64,7 +64,8 @@ public:
|
||||
~FatBinaryInfo();
|
||||
|
||||
// Loads Fat binary from file or image, unbundles COs for devices.
|
||||
hipError_t ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices);
|
||||
hipError_t ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices,
|
||||
bool &containGenericTarget);
|
||||
|
||||
/**
|
||||
* @brief Extract code object from fatbin using comgr unbundling action via calling
|
||||
|
||||
@@ -266,6 +266,30 @@ static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupp
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx1201";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx9-generic";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
|
||||
xnackSupported = true;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx10-1-generic";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx10-3-generic";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx11-generic";
|
||||
break;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC:
|
||||
xnackSupported = false;
|
||||
sramEccSupported = false;
|
||||
proc_name = "gfx12-generic";
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
@@ -309,12 +333,16 @@ static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string
|
||||
}
|
||||
|
||||
case ELFABIVERSION_AMDGPU_HSA_V4:
|
||||
case ELFABIVERSION_AMDGPU_HSA_V5: {
|
||||
case ELFABIVERSION_AMDGPU_HSA_V5:
|
||||
case ELFABIVERSION_AMDGPU_HSA_V6: {
|
||||
if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V4) {
|
||||
LogPrintfInfo("[Code Object V4, target id:%s]", target_id.c_str());
|
||||
} else {
|
||||
} else if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V5) {
|
||||
LogPrintfInfo("[Code Object V5, target id:%s]", target_id.c_str());
|
||||
} else if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V6) {
|
||||
LogPrintfInfo("[Code Object V6, target id:%s]", target_id.c_str());
|
||||
}
|
||||
|
||||
unsigned co_sram_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_SRAMECC_V4;
|
||||
if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_OFF_V4)
|
||||
target_id += ":sramecc-";
|
||||
@@ -347,6 +375,42 @@ static bool consume(std::string& input, std::string consume_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Is agent target compatible with generic code object target?
|
||||
static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& agentTarget) {
|
||||
// The map is subject to change per removing policy
|
||||
static std::map<std::string, std::string> genericTargetMap{
|
||||
// "gfx9-generic"
|
||||
{"gfx900", "gfx9-generic"},
|
||||
{"gfx902", "gfx9-generic"},
|
||||
{"gfx904", "gfx9-generic"},
|
||||
{"gfx906", "gfx9-generic"},
|
||||
{"gfx909", "gfx9-generic"},
|
||||
{"gfx90c", "gfx9-generic"},
|
||||
// "gfx10-1-generic"
|
||||
{"gfx1010", "gfx10-1-generic"},
|
||||
{"gfx1011", "gfx10-1-generic"},
|
||||
{"gfx1012", "gfx10-1-generic"},
|
||||
{"gfx1013", "gfx10-1-generic"},
|
||||
// "gfx10-3-generic"
|
||||
{"gfx1030", "gfx10-3-generic"},
|
||||
{"gfx1031", "gfx10-3-generic"},
|
||||
{"gfx1032", "gfx10-3-generic"},
|
||||
{"gfx1033", "gfx10-3-generic"},
|
||||
{"gfx1034", "gfx10-3-generic"},
|
||||
{"gfx1035", "gfx10-3-generic"},
|
||||
{"gfx1036", "gfx10-3-generic"},
|
||||
// "gfx11-generic"
|
||||
{"gfx1100", "gfx11-generic"},
|
||||
{"gfx1101", "gfx11-generic"},
|
||||
{"gfx1102", "gfx11-generic"},
|
||||
{"gfx1103", "gfx11-generic"},
|
||||
{"gfx1150", "gfx11-generic"},
|
||||
{"gfx1151", "gfx11-generic"},
|
||||
};
|
||||
auto search = genericTargetMap.find(agentTarget);
|
||||
return search != genericTargetMap.end() && coTarget == search->second;
|
||||
}
|
||||
|
||||
// Trim String till character, will be used to get gpuname
|
||||
// example: input is gfx908:sram-ecc+ and trim char is :
|
||||
// input will become sram-ecc+.
|
||||
@@ -382,7 +446,7 @@ static bool getTargetIDValue(std::string& input, std::string& processor, char& s
|
||||
}
|
||||
|
||||
static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_object,
|
||||
std::string& co_triple_target_id) {
|
||||
std::string& co_triple_target_id) {
|
||||
std::string offload_kind = trimName(bundled_co_entry_id, '-');
|
||||
if (offload_kind != OFFLOAD_KIND_HIPV4 && offload_kind != OFFLOAD_KIND_HIP &&
|
||||
offload_kind != OFFLOAD_KIND_HCC)
|
||||
@@ -398,7 +462,7 @@ static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_
|
||||
}
|
||||
|
||||
bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
|
||||
std::string agent_triple_target_id) {
|
||||
std::string agent_triple_target_id, unsigned& genericVersion) {
|
||||
// Primitive Check
|
||||
if (co_triple_target_id == agent_triple_target_id) return true;
|
||||
|
||||
@@ -430,7 +494,14 @@ bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
|
||||
if (!agent_triple_target_id.empty()) return false;
|
||||
|
||||
// Check for compatibility
|
||||
if (agent_isa_processor != co_processor) return false;
|
||||
if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
|
||||
// co_processor is generic target
|
||||
if (!isCompatibleWithGenericTarget(co_processor, agent_isa_processor))
|
||||
return false;
|
||||
} else if (agent_isa_processor != co_processor) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (co_sram_ecc != ' ') {
|
||||
if (co_sram_ecc != isa_sram_ecc) return false;
|
||||
}
|
||||
@@ -441,6 +512,17 @@ bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline unsigned int getGenericVersion(const void* image) {
|
||||
const Elf64_Ehdr* ehdr = reinterpret_cast<const Elf64_Ehdr*>(image);
|
||||
return ehdr->e_ident[EI_ABIVERSION] == ELFABIVERSION_AMDGPU_HSA_V6
|
||||
? ((ehdr->e_flags & EF_AMDGPU_GENERIC_VERSION) >> EF_AMDGPU_GENERIC_VERSION_OFFSET)
|
||||
: 0;
|
||||
}
|
||||
|
||||
static inline bool isGenericTarget(const void* image) {
|
||||
return getGenericVersion(image) >= EF_AMDGPU_GENERIC_VERSION_MIN;
|
||||
}
|
||||
|
||||
bool UnbundleBitCode(const std::vector<char>& bundled_llvm_bitcode, const std::string& isa,
|
||||
size_t& co_offset, size_t& co_size) {
|
||||
std::string magic(bundled_llvm_bitcode.begin(),
|
||||
@@ -464,8 +546,10 @@ bool UnbundleBitCode(const std::vector<char>& bundled_llvm_bitcode, const std::s
|
||||
const size_t image_size = desc->size;
|
||||
std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize};
|
||||
|
||||
// Need call getTripleTargetID(...).
|
||||
// Check if the device id and code object id are compatible
|
||||
if (isCodeObjectCompatibleWithDevice(bundleEntryId, isa)) {
|
||||
unsigned genericVersion = getGenericVersion(image);
|
||||
if (isCodeObjectCompatibleWithDevice(bundleEntryId, isa, genericVersion)) {
|
||||
co_offset = (reinterpret_cast<uintptr_t>(image) - reinterpret_cast<uintptr_t>(data));
|
||||
co_size = image_size;
|
||||
break;
|
||||
|
||||
@@ -137,6 +137,9 @@ void OCLOfflineCompilation::open(unsigned int test, char* units,
|
||||
char strVersion[128];
|
||||
_wrapper->clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(name), name,
|
||||
NULL);
|
||||
if (strstr(name, "-generic") != NULL) {
|
||||
continue; // Skip generic target because it needs code object version 6
|
||||
}
|
||||
error_ = _wrapper->clGetDeviceInfo(devices[i], CL_DEVICE_VERSION,
|
||||
sizeof(strVersion), strVersion, 0);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
|
||||
@@ -200,6 +200,7 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
|
||||
{"gfx90c", nullptr, true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx90c:xnack-", "gfx90c", true, true, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx90c:xnack+", "gfx90d", true, true, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx9-generic", nullptr, true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1010", "gfx1010", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1010:xnack-", "gfx1010", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1010:xnack+", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
@@ -212,6 +213,7 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
|
||||
{"gfx1013", "gfx1013", true, false, 10, 1, 3, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1013:xnack-", "gfx1013", true, false, 10, 1, 3, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1013:xnack+", nullptr, true, false, 10, 1, 3, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx10-1-generic", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1030", "gfx1030", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1031", "gfx1031", true, true, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1032", "gfx1032", true, true, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
@@ -219,6 +221,7 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
|
||||
{"gfx1034", "gfx1034", true, true, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1035", "gfx1035", true, true, 10, 3, 5, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1036", "gfx1036", true, true, 10, 3, 6, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx10-3-generic", nullptr, true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1100", "gfx1100", true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1101", "gfx1101", true, true, 11, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1102", "gfx1102", true, true, 11, 0, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
@@ -226,8 +229,10 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
|
||||
{"gfx1150", "gfx1150", true, true, 11, 5, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1151", "gfx1151", true, true, 11, 5, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1152", "gfx1152", true, true, 11, 5, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx11-generic", nullptr, true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1200", "gfx1200", true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1201", "gfx1201", true, true, 12, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx12-generic", nullptr, true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
};
|
||||
return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_));
|
||||
}
|
||||
@@ -242,13 +247,30 @@ std::string Isa::isaName() const {
|
||||
}
|
||||
|
||||
bool Isa::isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa) {
|
||||
if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() ||
|
||||
codeObjectIsa.versionMinor() != agentIsa.versionMinor() ||
|
||||
codeObjectIsa.versionStepping() != agentIsa.versionStepping())
|
||||
return false;
|
||||
|
||||
assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() &&
|
||||
agentIsa.sramecc() != Feature::Any);
|
||||
bool isGeneric = std::strstr(codeObjectIsa.targetId(), "generic") != nullptr;
|
||||
if (isGeneric) {
|
||||
if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() ||
|
||||
codeObjectIsa.versionMinor() > agentIsa.versionMinor() ||
|
||||
(codeObjectIsa.versionMinor() == agentIsa.versionMinor() &&
|
||||
codeObjectIsa.versionStepping() > agentIsa.versionStepping()))
|
||||
return false;
|
||||
if (std::strstr(agentIsa.targetId(), "gfx906") != nullptr) {
|
||||
// For the generic target of gfx906, codeObjectIsa.isSrameccSupported() == false while
|
||||
// agentIsa.isSrameccSupported() = true
|
||||
assert(agentIsa.sramecc() != Feature::Any);
|
||||
}
|
||||
else {
|
||||
assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() &&
|
||||
agentIsa.sramecc() != Feature::Any);
|
||||
}
|
||||
} else {
|
||||
if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() ||
|
||||
codeObjectIsa.versionMinor() != agentIsa.versionMinor() ||
|
||||
codeObjectIsa.versionStepping() != agentIsa.versionStepping())
|
||||
return false;
|
||||
assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() &&
|
||||
agentIsa.sramecc() != Feature::Any);
|
||||
}
|
||||
if ((codeObjectIsa.sramecc() == Feature::Enabled ||
|
||||
codeObjectIsa.sramecc() == Feature::Disabled) &&
|
||||
codeObjectIsa.sramecc() != agentIsa.sramecc())
|
||||
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador