2
0

SWDEV-447973 - Support generic targets

Change-Id: I32db83843e45e0f013591493aafd7a532c881e16


[ROCm/clr commit: f1f4f40c5b]
Este cometimento está contido em:
taosang2
2024-03-08 13:31:08 -05:00
cometido por Tao Sang
ascendente 3e8d5599d4
cometimento f3e3d8178b
8 ficheiros modificados com 303 adições e 49 eliminações
+24 -4
Ver ficheiro
@@ -32,7 +32,8 @@ enum {
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
ELFABIVERSION_AMDGPU_HSA_V5 = 3
ELFABIVERSION_AMDGPU_HSA_V5 = 3,
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
};
// AMDGPU specific e_flags
@@ -109,10 +110,21 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
EF_AMDGPU_MACH_AMDGCN_GFX1152 = 0x055,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X56 = 0x056,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57 = 0x057,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X58 = 0x058,
EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC = 0x059,
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1201,
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
@@ -125,7 +137,8 @@ enum : unsigned {
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3.
EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200,
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4,
// ELFABIVERSION_AMDGPU_HSA_V5 and ELFABIVERSION_AMDGPU_HSA_V6.
EF_AMDGPU_FEATURE_XNACK_V4 = 0x300,
EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000,
EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100,
@@ -133,10 +146,17 @@ enum : unsigned {
EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300,
// SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values.
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4.
// Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4,
// ELFABIVERSION_AMDGPU_HSA_V5 and ELFABIVERSION_AMDGPU_HSA_V6.
EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00,
EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000,
EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400,
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
// Generic target versioning. This is contained in the list byte of EFLAGS.
EF_AMDGPU_GENERIC_VERSION = 0xff000000,
EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
EF_AMDGPU_GENERIC_VERSION_MIN = 1,
EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
};
+128 -17
Ver ficheiro
@@ -97,6 +97,33 @@ bool CodeObject::IsClangOffloadMagicBundle(const void* data, bool& isCompressed)
return false;
}
unsigned int CodeObject::getGenericVersion(const void* image) {
const Elf64_Ehdr* ehdr = reinterpret_cast<const Elf64_Ehdr*>(image);
return (ehdr->e_machine == EM_AMDGPU && ehdr->e_ident[EI_OSABI] == ELFOSABI_AMDGPU_HSA &&
ehdr->e_ident[EI_ABIVERSION] == ELFABIVERSION_AMDGPU_HSA_V6) ?
((ehdr->e_flags & EF_AMDGPU_GENERIC_VERSION) >> EF_AMDGPU_GENERIC_VERSION_OFFSET) : 0;
}
bool CodeObject::isGenericTarget(const void* image) {
return getGenericVersion(image) >= EF_AMDGPU_GENERIC_VERSION_MIN;
}
bool CodeObject::containGenericTarget(const void *data) {
const auto obheader = reinterpret_cast<const __ClangOffloadBundleUncompressedHeader*>(data);
const auto* desc = &obheader->desc[0];
for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i,
desc = reinterpret_cast<const __ClangOffloadBundleInfo*>(
reinterpret_cast<uintptr_t>(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize)) {
if (desc->size == 0) continue;
const void* image =
reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + desc->offset);
if (isGenericTarget(image)) {
return true;
}
}
return false;
}
uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); }
static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported,
@@ -307,6 +334,31 @@ static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupp
sramEccSupported = false;
proc_name = "gfx1201";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx9-generic";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx10-1-generic";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx10-3-generic";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx11-generic";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx12-generic";
break;
default:
return false;
}
@@ -320,7 +372,7 @@ static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string
if (ehdr->e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) return false;
bool isXnackSupported{false}, isSramEccSupported{false};
const char* vstr = nullptr;
std::string proc_name;
if (!getProcName(ehdr->e_flags, proc_name, isXnackSupported, isSramEccSupported)) return false;
target_id = std::string(kAmdgcnTargetTriple) + '-' + proc_name;
@@ -349,11 +401,14 @@ static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string
}
case ELFABIVERSION_AMDGPU_HSA_V4:
case ELFABIVERSION_AMDGPU_HSA_V5: {
case ELFABIVERSION_AMDGPU_HSA_V5:
case ELFABIVERSION_AMDGPU_HSA_V6: {
if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V4) {
LogPrintfInfo("[Code Object V4, target id:%s]", target_id.c_str());
} else {
LogPrintfInfo("[Code Object V5, target id:%s]", target_id.c_str());
vstr = "V4";
} else if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V5) {
vstr = "V5";
} else if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V6) {
vstr = "V6";
}
unsigned co_sram_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_SRAMECC_V4;
if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_OFF_V4)
@@ -364,8 +419,10 @@ static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string
unsigned co_xnack_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_XNACK_V4;
if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_OFF_V4)
target_id += ":xnack-";
else if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_ON_V4)
target_id += ":xnack+";
LogPrintfInfo("[Code Object %s, target id: %s]", vstr, target_id.c_str());
break;
}
@@ -387,6 +444,45 @@ static bool consume(std::string& input, std::string consume_) {
return true;
}
// Is agent target compatible with generic code object target?
static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& agentTarget) {
// The map is subject to change per removing policy
static std::map<std::string, std::string> genericTargetMap{
// "gfx9-generic"
{"gfx900", "gfx9-generic"},
{"gfx902", "gfx9-generic"},
{"gfx904", "gfx9-generic"},
{"gfx906", "gfx9-generic"},
{"gfx909", "gfx9-generic"},
{"gfx90c", "gfx9-generic"},
// "gfx10-1-generic"
{"gfx1010", "gfx10-1-generic"},
{"gfx1011", "gfx10-1-generic"},
{"gfx1012", "gfx10-1-generic"},
{"gfx1013", "gfx10-1-generic"},
// "gfx10-3-generic"
{"gfx1030", "gfx10-3-generic"},
{"gfx1031", "gfx10-3-generic"},
{"gfx1032", "gfx10-3-generic"},
{"gfx1033", "gfx10-3-generic"},
{"gfx1034", "gfx10-3-generic"},
{"gfx1035", "gfx10-3-generic"},
{"gfx1036", "gfx10-3-generic"},
// "gfx11-generic"
{"gfx1100", "gfx11-generic"},
{"gfx1101", "gfx11-generic"},
{"gfx1102", "gfx11-generic"},
{"gfx1103", "gfx11-generic"},
{"gfx1150", "gfx11-generic"},
{"gfx1151", "gfx11-generic"},
// "gfx12-generic"
{"gfx1200", "gfx12-generic"},
{"gfx1201", "gfx12-generic"},
};
auto search = genericTargetMap.find(agentTarget);
return search != genericTargetMap.end() && coTarget == search->second;
}
// Trim String till character, will be used to get gpuname
// example: input is gfx908:sram-ecc+ and trim char is :
// input will become :sram-ecc+.
@@ -434,12 +530,11 @@ static bool getTargetIDValue(std::string& input, std::string& processor, char& s
}
static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_object,
std::string& co_triple_target_id) {
std::string& co_triple_target_id) {
std::string offload_kind = trimName(bundled_co_entry_id, '-');
if (offload_kind != kOffloadKindHipv4 && offload_kind != kOffloadKindHip &&
offload_kind != kOffloadKindHcc)
return false;
if (offload_kind != kOffloadKindHipv4)
return getTripleTargetIDFromCodeObject(code_object, co_triple_target_id);
@@ -450,7 +545,7 @@ static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_
}
static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
std::string agent_triple_target_id) {
std::string agent_triple_target_id, unsigned int genericVersion) {
// Primitive Check
if (co_triple_target_id == agent_triple_target_id) return true;
@@ -481,7 +576,13 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
if (!agent_triple_target_id.empty()) return false;
// Check for compatibility
if (agent_isa_processor != co_processor) return false;
if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
// co_processor is generic target
if (!isCompatibleWithGenericTarget(co_processor, agent_isa_processor))
return false;
} else if (agent_isa_processor != co_processor) {
return false;
}
if (co_sram_ecc != ' ') {
if (co_sram_ecc != isa_sram_ecc) return false;
}
@@ -496,7 +597,7 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
hipError_t CodeObject::ExtractCodeObjectFromFile(
amd::Os::FileDesc fdesc, size_t fsize, const void** image,
const std::vector<std::string>& device_names,
std::vector<std::pair<const void*, size_t>>& code_objs) {
std::vector<std::pair<const void*, size_t>>& code_objs, size_t foffset) {
if (!amd::Os::isValidFileDesc(fdesc)) {
return hipErrorFileNotFound;
}
@@ -504,7 +605,7 @@ hipError_t CodeObject::ExtractCodeObjectFromFile(
// Map the file to memory, with offset 0.
// file will be unmapped in ModuleUnload
// const void* image = nullptr;
if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, 0, image)) {
if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, foffset, image)) {
return hipErrorInvalidValue;
}
@@ -528,8 +629,9 @@ hipError_t CodeObject::ExtractCodeObjectFromMemory(
hipError_t CodeObject::extractCodeObjectFromFatBinary(
const void* data, const std::vector<std::string>& agent_triple_target_ids,
std::vector<std::pair<const void*, size_t>>& code_objs) {
std::string magic((const char*)data, kOffloadBundleUncompressedMagicStrSize);
if (magic.compare(kOffloadBundleUncompressedMagicStr)) {
bool isCompressed = false;
if (!IsClangOffloadMagicBundle(data, isCompressed) || isCompressed) {
LogPrintfInfo("IsClangOffloadMagicBundle(%p) return false or isCompressed is true", data);
return hipErrorInvalidKernelFile;
}
@@ -554,13 +656,23 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize};
std::string co_triple_target_id;
unsigned int genericVersion = getGenericVersion(image);
if (!getTripleTargetID(bundleEntryId, image, co_triple_target_id)) continue;
LogPrintfInfo("bundleEntryId=%s, co_triple_target_id=%s, genericVersion=%d\n", bundleEntryId.c_str(),
co_triple_target_id.c_str(), genericVersion);
for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) {
if (code_objs[dev].first) continue;
if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev])) {
if (code_objs[dev].first) {
// Specific target already matched, skipped.
// But for generic target, we will continue searching for matched specific target.
if (!isGenericTarget(code_objs[dev].first)) {
continue;
}
}
if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev],
genericVersion)) {
if (code_objs[dev].first == nullptr) --num_code_objs;
code_objs[dev] = std::make_pair(image, image_size);
--num_code_objs;
}
}
}
@@ -879,7 +991,6 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr(
std::string co_triple_target_id;
bool valid_co = getTripleTargetID(bundleEntryId, image, co_triple_target_id);
if (valid_co) {
LogPrintfError(" %s - [Code object targetID is %s]", bundleEntryId.c_str(),
co_triple_target_id.c_str());
+7 -1
Ver ficheiro
@@ -52,7 +52,7 @@ class CodeObject {
// return code_objs{binary_ptr, binary_size}, which could be used to determine foffset
static hipError_t ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t fsize,
const void ** image, const std::vector<std::string>& device_names,
std::vector<std::pair<const void*, size_t>>& code_objs);
std::vector<std::pair<const void*, size_t>>& code_objs, size_t foffset);
// Given an ptr to memory, extracts to code object for corresponding devices,
// returns code_objs{binary_ptr, binary_size} and uniform resource indicator
@@ -65,6 +65,12 @@ class CodeObject {
static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed);
static unsigned int getGenericVersion(const void* image);
static bool isGenericTarget(const void* image);
static bool containGenericTarget(const void *data);
// Return size of fat bin
static size_t getFatbinSize(const void* data, const bool isCompressed = false);
+20 -13
Ver ficheiro
@@ -118,7 +118,8 @@ void ListAllDeviceWithNoCOFromBundle(const std::unordered_map<std::string,
}
}
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices) {
hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices,
bool &containGenericTarget) {
amd_comgr_data_t data_object {0};
amd_comgr_status_t comgr_status = AMD_COMGR_STATUS_SUCCESS;
hipError_t hip_status = hipSuccess;
@@ -187,6 +188,13 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
}
break;
}
if (!isCompressed) {
if (CodeObject::containGenericTarget(image_)) {
LogInfo("offload bundle contains generic target code object");
containGenericTarget = true;
return hipErrorNoBinaryForGpu; // This path doesn't support generic target
}
}
if (isCompressed || HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION) {
size_t major = 0, minor = 0;
amd::Comgr::get_version(&major, &minor);
@@ -294,7 +302,7 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
// Clean up file and memory resouces if hip_status failed for some reason.
if (hip_status != hipSuccess && hip_status != hipErrorInvalidKernelFile) {
if (image_mapped_) {
if (!amd::Os::MemoryUnmapFile(image_, fsize_))
if (!amd::Os::MemoryUnmapFile(image_, ufd_->fsize_))
guarantee(false, "Cannot unmap the file");
image_ = nullptr;
@@ -323,9 +331,10 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector<hip::Device*>& devices) {
if (!HIP_USE_RUNTIME_UNBUNDLER) {
return ExtractFatBinaryUsingCOMGR(devices);
bool containGenericTarget = false;
hipError_t status = ExtractFatBinaryUsingCOMGR(devices, containGenericTarget);
if (!containGenericTarget) return status;
}
hipError_t hip_error = hipSuccess;
std::vector<std::pair<const void*, size_t>> code_objs;
@@ -335,9 +344,12 @@ hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector<hip::Device*>& devi
for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) {
device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName());
}
// We are given file name, get the file desc and file size
if (fname_.size() > 0) {
if (image_ != nullptr) {
// We are directly given image pointer directly, try to extract file desc & file Size
hip_error = CodeObject::ExtractCodeObjectFromMemory(image_,
device_names, code_objs, uri_);
} else if (fname_.size() > 0) {
// We are given file name, get the file desc and file size
// Get File Handle & size of the file.
if (!amd::Os::GetFileHandle(fname_.c_str(), &fdesc_, &fsize_)) {
return hipErrorFileNotFound;
@@ -348,12 +360,7 @@ hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector<hip::Device*>& devi
// Extract the code object from file
hip_error = CodeObject::ExtractCodeObjectFromFile(fdesc_, fsize_, &image_,
device_names, code_objs);
} else if (image_ != nullptr) {
// We are directly given image pointer directly, try to extract file desc & file Size
hip_error = CodeObject::ExtractCodeObjectFromMemory(image_,
device_names, code_objs, uri_);
device_names, code_objs, foffset_);
} else {
return hipErrorInvalidValue;
}
+2 -1
Ver ficheiro
@@ -64,7 +64,8 @@ public:
~FatBinaryInfo();
// Loads Fat binary from file or image, unbundles COs for devices.
hipError_t ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices);
hipError_t ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices,
bool &containGenericTarget);
/**
* @brief Extract code object from fatbin using comgr unbundling action via calling
+90 -6
Ver ficheiro
@@ -266,6 +266,30 @@ static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupp
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx1201";
case EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx9-generic";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
xnackSupported = true;
sramEccSupported = false;
proc_name = "gfx10-1-generic";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx10-3-generic";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx11-generic";
break;
case EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC:
xnackSupported = false;
sramEccSupported = false;
proc_name = "gfx12-generic";
break;
default:
return false;
@@ -309,12 +333,16 @@ static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string
}
case ELFABIVERSION_AMDGPU_HSA_V4:
case ELFABIVERSION_AMDGPU_HSA_V5: {
case ELFABIVERSION_AMDGPU_HSA_V5:
case ELFABIVERSION_AMDGPU_HSA_V6: {
if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V4) {
LogPrintfInfo("[Code Object V4, target id:%s]", target_id.c_str());
} else {
} else if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V5) {
LogPrintfInfo("[Code Object V5, target id:%s]", target_id.c_str());
} else if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V6) {
LogPrintfInfo("[Code Object V6, target id:%s]", target_id.c_str());
}
unsigned co_sram_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_SRAMECC_V4;
if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_OFF_V4)
target_id += ":sramecc-";
@@ -347,6 +375,42 @@ static bool consume(std::string& input, std::string consume_) {
return true;
}
// Is agent target compatible with generic code object target?
static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& agentTarget) {
// The map is subject to change per removing policy
static std::map<std::string, std::string> genericTargetMap{
// "gfx9-generic"
{"gfx900", "gfx9-generic"},
{"gfx902", "gfx9-generic"},
{"gfx904", "gfx9-generic"},
{"gfx906", "gfx9-generic"},
{"gfx909", "gfx9-generic"},
{"gfx90c", "gfx9-generic"},
// "gfx10-1-generic"
{"gfx1010", "gfx10-1-generic"},
{"gfx1011", "gfx10-1-generic"},
{"gfx1012", "gfx10-1-generic"},
{"gfx1013", "gfx10-1-generic"},
// "gfx10-3-generic"
{"gfx1030", "gfx10-3-generic"},
{"gfx1031", "gfx10-3-generic"},
{"gfx1032", "gfx10-3-generic"},
{"gfx1033", "gfx10-3-generic"},
{"gfx1034", "gfx10-3-generic"},
{"gfx1035", "gfx10-3-generic"},
{"gfx1036", "gfx10-3-generic"},
// "gfx11-generic"
{"gfx1100", "gfx11-generic"},
{"gfx1101", "gfx11-generic"},
{"gfx1102", "gfx11-generic"},
{"gfx1103", "gfx11-generic"},
{"gfx1150", "gfx11-generic"},
{"gfx1151", "gfx11-generic"},
};
auto search = genericTargetMap.find(agentTarget);
return search != genericTargetMap.end() && coTarget == search->second;
}
// Trim String till character, will be used to get gpuname
// example: input is gfx908:sram-ecc+ and trim char is :
// input will become sram-ecc+.
@@ -382,7 +446,7 @@ static bool getTargetIDValue(std::string& input, std::string& processor, char& s
}
static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_object,
std::string& co_triple_target_id) {
std::string& co_triple_target_id) {
std::string offload_kind = trimName(bundled_co_entry_id, '-');
if (offload_kind != OFFLOAD_KIND_HIPV4 && offload_kind != OFFLOAD_KIND_HIP &&
offload_kind != OFFLOAD_KIND_HCC)
@@ -398,7 +462,7 @@ static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_
}
bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
std::string agent_triple_target_id) {
std::string agent_triple_target_id, unsigned& genericVersion) {
// Primitive Check
if (co_triple_target_id == agent_triple_target_id) return true;
@@ -430,7 +494,14 @@ bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
if (!agent_triple_target_id.empty()) return false;
// Check for compatibility
if (agent_isa_processor != co_processor) return false;
if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) {
// co_processor is generic target
if (!isCompatibleWithGenericTarget(co_processor, agent_isa_processor))
return false;
} else if (agent_isa_processor != co_processor) {
return false;
}
if (co_sram_ecc != ' ') {
if (co_sram_ecc != isa_sram_ecc) return false;
}
@@ -441,6 +512,17 @@ bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
return true;
}
static inline unsigned int getGenericVersion(const void* image) {
const Elf64_Ehdr* ehdr = reinterpret_cast<const Elf64_Ehdr*>(image);
return ehdr->e_ident[EI_ABIVERSION] == ELFABIVERSION_AMDGPU_HSA_V6
? ((ehdr->e_flags & EF_AMDGPU_GENERIC_VERSION) >> EF_AMDGPU_GENERIC_VERSION_OFFSET)
: 0;
}
static inline bool isGenericTarget(const void* image) {
return getGenericVersion(image) >= EF_AMDGPU_GENERIC_VERSION_MIN;
}
bool UnbundleBitCode(const std::vector<char>& bundled_llvm_bitcode, const std::string& isa,
size_t& co_offset, size_t& co_size) {
std::string magic(bundled_llvm_bitcode.begin(),
@@ -464,8 +546,10 @@ bool UnbundleBitCode(const std::vector<char>& bundled_llvm_bitcode, const std::s
const size_t image_size = desc->size;
std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize};
// Need call getTripleTargetID(...).
// Check if the device id and code object id are compatible
if (isCodeObjectCompatibleWithDevice(bundleEntryId, isa)) {
unsigned genericVersion = getGenericVersion(image);
if (isCodeObjectCompatibleWithDevice(bundleEntryId, isa, genericVersion)) {
co_offset = (reinterpret_cast<uintptr_t>(image) - reinterpret_cast<uintptr_t>(data));
co_size = image_size;
break;
@@ -137,6 +137,9 @@ void OCLOfflineCompilation::open(unsigned int test, char* units,
char strVersion[128];
_wrapper->clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(name), name,
NULL);
if (strstr(name, "-generic") != NULL) {
continue; // Skip generic target because it needs code object version 6
}
error_ = _wrapper->clGetDeviceInfo(devices[i], CL_DEVICE_VERSION,
sizeof(strVersion), strVersion, 0);
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
+29 -7
Ver ficheiro
@@ -200,6 +200,7 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
{"gfx90c", nullptr, true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx90c:xnack-", "gfx90c", true, true, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx90c:xnack+", "gfx90d", true, true, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx9-generic", nullptr, true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx1010", "gfx1010", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1010:xnack-", "gfx1010", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1010:xnack+", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32},
@@ -212,6 +213,7 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
{"gfx1013", "gfx1013", true, false, 10, 1, 3, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1013:xnack-", "gfx1013", true, false, 10, 1, 3, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1013:xnack+", nullptr, true, false, 10, 1, 3, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx10-1-generic", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1030", "gfx1030", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1031", "gfx1031", true, true, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1032", "gfx1032", true, true, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
@@ -219,6 +221,7 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
{"gfx1034", "gfx1034", true, true, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1035", "gfx1035", true, true, 10, 3, 5, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1036", "gfx1036", true, true, 10, 3, 6, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx10-3-generic", nullptr, true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1100", "gfx1100", true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1101", "gfx1101", true, true, 11, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1102", "gfx1102", true, true, 11, 0, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
@@ -226,8 +229,10 @@ std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
{"gfx1150", "gfx1150", true, true, 11, 5, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1151", "gfx1151", true, true, 11, 5, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1152", "gfx1152", true, true, 11, 5, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx11-generic", nullptr, true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1200", "gfx1200", true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1201", "gfx1201", true, true, 12, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx12-generic", nullptr, true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
};
return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_));
}
@@ -242,13 +247,30 @@ std::string Isa::isaName() const {
}
bool Isa::isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa) {
if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() ||
codeObjectIsa.versionMinor() != agentIsa.versionMinor() ||
codeObjectIsa.versionStepping() != agentIsa.versionStepping())
return false;
assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() &&
agentIsa.sramecc() != Feature::Any);
bool isGeneric = std::strstr(codeObjectIsa.targetId(), "generic") != nullptr;
if (isGeneric) {
if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() ||
codeObjectIsa.versionMinor() > agentIsa.versionMinor() ||
(codeObjectIsa.versionMinor() == agentIsa.versionMinor() &&
codeObjectIsa.versionStepping() > agentIsa.versionStepping()))
return false;
if (std::strstr(agentIsa.targetId(), "gfx906") != nullptr) {
// For the generic target of gfx906, codeObjectIsa.isSrameccSupported() == false while
// agentIsa.isSrameccSupported() = true
assert(agentIsa.sramecc() != Feature::Any);
}
else {
assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() &&
agentIsa.sramecc() != Feature::Any);
}
} else {
if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() ||
codeObjectIsa.versionMinor() != agentIsa.versionMinor() ||
codeObjectIsa.versionStepping() != agentIsa.versionStepping())
return false;
assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() &&
agentIsa.sramecc() != Feature::Any);
}
if ((codeObjectIsa.sramecc() == Feature::Enabled ||
codeObjectIsa.sramecc() == Feature::Disabled) &&
codeObjectIsa.sramecc() != agentIsa.sramecc())