From f3e3d8178be4547e2c32bde918fb46cee26c1068 Mon Sep 17 00:00:00 2001 From: taosang2 Date: Fri, 8 Mar 2024 13:31:08 -0500 Subject: [PATCH] SWDEV-447973 - Support generic targets Change-Id: I32db83843e45e0f013591493aafd7a532c881e16 [ROCm/clr commit: f1f4f40c5b9c20b708ec51b032064f4f17471a35] --- projects/clr/hipamd/src/amd_hsa_elf.hpp | 28 +++- projects/clr/hipamd/src/hip_code_object.cpp | 145 ++++++++++++++++-- projects/clr/hipamd/src/hip_code_object.hpp | 8 +- projects/clr/hipamd/src/hip_fatbin.cpp | 33 ++-- projects/clr/hipamd/src/hip_fatbin.hpp | 3 +- .../hipamd/src/hiprtc/hiprtcComgrHelper.cpp | 96 +++++++++++- .../module/runtime/OCLOfflineCompilation.cpp | 3 + projects/clr/rocclr/device/device.cpp | 36 ++++- 8 files changed, 303 insertions(+), 49 deletions(-) diff --git a/projects/clr/hipamd/src/amd_hsa_elf.hpp b/projects/clr/hipamd/src/amd_hsa_elf.hpp index 58a42eb488..f150d21d60 100644 --- a/projects/clr/hipamd/src/amd_hsa_elf.hpp +++ b/projects/clr/hipamd/src/amd_hsa_elf.hpp @@ -32,7 +32,8 @@ enum { ELFABIVERSION_AMDGPU_HSA_V2 = 0, ELFABIVERSION_AMDGPU_HSA_V3 = 1, ELFABIVERSION_AMDGPU_HSA_V4 = 2, - ELFABIVERSION_AMDGPU_HSA_V5 = 3 + ELFABIVERSION_AMDGPU_HSA_V5 = 3, + ELFABIVERSION_AMDGPU_HSA_V6 = 4, }; // AMDGPU specific e_flags @@ -109,10 +110,21 @@ enum : unsigned { EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d, EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e, + EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f, + EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050, + EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051, + EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052, + EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053, + EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054, + EF_AMDGPU_MACH_AMDGCN_GFX1152 = 0x055, + EF_AMDGPU_MACH_AMDGCN_RESERVED_0X56 = 0x056, + EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57 = 0x057, + EF_AMDGPU_MACH_AMDGCN_RESERVED_0X58 = 0x058, + EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC = 0x059, // First/last AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, - EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1201, + EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC, // Indicates if the "xnack" target feature is enabled for all code contained // in the object. @@ -125,7 +137,8 @@ enum : unsigned { // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V3. EF_AMDGPU_FEATURE_SRAMECC_V3 = 0x200, - // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4. + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4, + // ELFABIVERSION_AMDGPU_HSA_V5 and ELFABIVERSION_AMDGPU_HSA_V6. EF_AMDGPU_FEATURE_XNACK_V4 = 0x300, EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 = 0x000, EF_AMDGPU_FEATURE_XNACK_ANY_V4 = 0x100, @@ -133,10 +146,17 @@ enum : unsigned { EF_AMDGPU_FEATURE_XNACK_ON_V4 = 0x300, // SRAMECC selection mask for EF_AMDGPU_FEATURE_SRAMECC_* values. - // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4. + // Only valid for ELFOSABI_AMDGPU_HSA and ELFABIVERSION_AMDGPU_HSA_V4, + // ELFABIVERSION_AMDGPU_HSA_V5 and ELFABIVERSION_AMDGPU_HSA_V6. EF_AMDGPU_FEATURE_SRAMECC_V4 = 0xc00, EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 = 0x000, EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 = 0x400, EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800, EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00, + + // Generic target versioning. This is contained in the list byte of EFLAGS. + EF_AMDGPU_GENERIC_VERSION = 0xff000000, + EF_AMDGPU_GENERIC_VERSION_OFFSET = 24, + EF_AMDGPU_GENERIC_VERSION_MIN = 1, + EF_AMDGPU_GENERIC_VERSION_MAX = 0xff, }; diff --git a/projects/clr/hipamd/src/hip_code_object.cpp b/projects/clr/hipamd/src/hip_code_object.cpp index 37d8b0d6b0..557967117a 100644 --- a/projects/clr/hipamd/src/hip_code_object.cpp +++ b/projects/clr/hipamd/src/hip_code_object.cpp @@ -97,6 +97,33 @@ bool CodeObject::IsClangOffloadMagicBundle(const void* data, bool& isCompressed) return false; } +unsigned int CodeObject::getGenericVersion(const void* image) { + const Elf64_Ehdr* ehdr = reinterpret_cast(image); + return (ehdr->e_machine == EM_AMDGPU && ehdr->e_ident[EI_OSABI] == ELFOSABI_AMDGPU_HSA && + ehdr->e_ident[EI_ABIVERSION] == ELFABIVERSION_AMDGPU_HSA_V6) ? + ((ehdr->e_flags & EF_AMDGPU_GENERIC_VERSION) >> EF_AMDGPU_GENERIC_VERSION_OFFSET) : 0; +} + +bool CodeObject::isGenericTarget(const void* image) { + return getGenericVersion(image) >= EF_AMDGPU_GENERIC_VERSION_MIN; +} + +bool CodeObject::containGenericTarget(const void *data) { + const auto obheader = reinterpret_cast(data); + const auto* desc = &obheader->desc[0]; + for (uint64_t i = 0; i < obheader->numOfCodeObjects; ++i, + desc = reinterpret_cast( + reinterpret_cast(&desc->bundleEntryId[0]) + desc->bundleEntryIdSize)) { + if (desc->size == 0) continue; + const void* image = + reinterpret_cast(reinterpret_cast(obheader) + desc->offset); + if (isGenericTarget(image)) { + return true; + } + } + return false; +} + uint64_t CodeObject::ElfSize(const void* emi) { return amd::Elf::getElfSize(emi); } static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupported, @@ -307,6 +334,31 @@ static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupp sramEccSupported = false; proc_name = "gfx1201"; break; + case EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx9-generic"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx10-1-generic"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx10-3-generic"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx11-generic"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx12-generic"; + break; default: return false; } @@ -320,7 +372,7 @@ static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string if (ehdr->e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) return false; bool isXnackSupported{false}, isSramEccSupported{false}; - + const char* vstr = nullptr; std::string proc_name; if (!getProcName(ehdr->e_flags, proc_name, isXnackSupported, isSramEccSupported)) return false; target_id = std::string(kAmdgcnTargetTriple) + '-' + proc_name; @@ -349,11 +401,14 @@ static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string } case ELFABIVERSION_AMDGPU_HSA_V4: - case ELFABIVERSION_AMDGPU_HSA_V5: { + case ELFABIVERSION_AMDGPU_HSA_V5: + case ELFABIVERSION_AMDGPU_HSA_V6: { if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V4) { - LogPrintfInfo("[Code Object V4, target id:%s]", target_id.c_str()); - } else { - LogPrintfInfo("[Code Object V5, target id:%s]", target_id.c_str()); + vstr = "V4"; + } else if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V5) { + vstr = "V5"; + } else if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V6) { + vstr = "V6"; } unsigned co_sram_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_SRAMECC_V4; if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_OFF_V4) @@ -364,8 +419,10 @@ static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string unsigned co_xnack_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_XNACK_V4; if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_OFF_V4) target_id += ":xnack-"; + else if (co_xnack_value == EF_AMDGPU_FEATURE_XNACK_ON_V4) target_id += ":xnack+"; + LogPrintfInfo("[Code Object %s, target id: %s]", vstr, target_id.c_str()); break; } @@ -387,6 +444,45 @@ static bool consume(std::string& input, std::string consume_) { return true; } +// Is agent target compatible with generic code object target? +static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& agentTarget) { + // The map is subject to change per removing policy + static std::map genericTargetMap{ + // "gfx9-generic" + {"gfx900", "gfx9-generic"}, + {"gfx902", "gfx9-generic"}, + {"gfx904", "gfx9-generic"}, + {"gfx906", "gfx9-generic"}, + {"gfx909", "gfx9-generic"}, + {"gfx90c", "gfx9-generic"}, + // "gfx10-1-generic" + {"gfx1010", "gfx10-1-generic"}, + {"gfx1011", "gfx10-1-generic"}, + {"gfx1012", "gfx10-1-generic"}, + {"gfx1013", "gfx10-1-generic"}, + // "gfx10-3-generic" + {"gfx1030", "gfx10-3-generic"}, + {"gfx1031", "gfx10-3-generic"}, + {"gfx1032", "gfx10-3-generic"}, + {"gfx1033", "gfx10-3-generic"}, + {"gfx1034", "gfx10-3-generic"}, + {"gfx1035", "gfx10-3-generic"}, + {"gfx1036", "gfx10-3-generic"}, + // "gfx11-generic" + {"gfx1100", "gfx11-generic"}, + {"gfx1101", "gfx11-generic"}, + {"gfx1102", "gfx11-generic"}, + {"gfx1103", "gfx11-generic"}, + {"gfx1150", "gfx11-generic"}, + {"gfx1151", "gfx11-generic"}, + // "gfx12-generic" + {"gfx1200", "gfx12-generic"}, + {"gfx1201", "gfx12-generic"}, + }; + auto search = genericTargetMap.find(agentTarget); + return search != genericTargetMap.end() && coTarget == search->second; +} + // Trim String till character, will be used to get gpuname // example: input is gfx908:sram-ecc+ and trim char is : // input will become :sram-ecc+. @@ -434,12 +530,11 @@ static bool getTargetIDValue(std::string& input, std::string& processor, char& s } static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_object, - std::string& co_triple_target_id) { + std::string& co_triple_target_id) { std::string offload_kind = trimName(bundled_co_entry_id, '-'); if (offload_kind != kOffloadKindHipv4 && offload_kind != kOffloadKindHip && offload_kind != kOffloadKindHcc) return false; - if (offload_kind != kOffloadKindHipv4) return getTripleTargetIDFromCodeObject(code_object, co_triple_target_id); @@ -450,7 +545,7 @@ static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_ } static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, - std::string agent_triple_target_id) { + std::string agent_triple_target_id, unsigned int genericVersion) { // Primitive Check if (co_triple_target_id == agent_triple_target_id) return true; @@ -481,7 +576,13 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, if (!agent_triple_target_id.empty()) return false; // Check for compatibility - if (agent_isa_processor != co_processor) return false; + if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) { + // co_processor is generic target + if (!isCompatibleWithGenericTarget(co_processor, agent_isa_processor)) + return false; + } else if (agent_isa_processor != co_processor) { + return false; + } if (co_sram_ecc != ' ') { if (co_sram_ecc != isa_sram_ecc) return false; } @@ -496,7 +597,7 @@ static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, hipError_t CodeObject::ExtractCodeObjectFromFile( amd::Os::FileDesc fdesc, size_t fsize, const void** image, const std::vector& device_names, - std::vector>& code_objs) { + std::vector>& code_objs, size_t foffset) { if (!amd::Os::isValidFileDesc(fdesc)) { return hipErrorFileNotFound; } @@ -504,7 +605,7 @@ hipError_t CodeObject::ExtractCodeObjectFromFile( // Map the file to memory, with offset 0. // file will be unmapped in ModuleUnload // const void* image = nullptr; - if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, 0, image)) { + if (!amd::Os::MemoryMapFileDesc(fdesc, fsize, foffset, image)) { return hipErrorInvalidValue; } @@ -528,8 +629,9 @@ hipError_t CodeObject::ExtractCodeObjectFromMemory( hipError_t CodeObject::extractCodeObjectFromFatBinary( const void* data, const std::vector& agent_triple_target_ids, std::vector>& code_objs) { - std::string magic((const char*)data, kOffloadBundleUncompressedMagicStrSize); - if (magic.compare(kOffloadBundleUncompressedMagicStr)) { + bool isCompressed = false; + if (!IsClangOffloadMagicBundle(data, isCompressed) || isCompressed) { + LogPrintfInfo("IsClangOffloadMagicBundle(%p) return false or isCompressed is true", data); return hipErrorInvalidKernelFile; } @@ -554,13 +656,23 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary( std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; std::string co_triple_target_id; + unsigned int genericVersion = getGenericVersion(image); if (!getTripleTargetID(bundleEntryId, image, co_triple_target_id)) continue; + LogPrintfInfo("bundleEntryId=%s, co_triple_target_id=%s, genericVersion=%d\n", bundleEntryId.c_str(), + co_triple_target_id.c_str(), genericVersion); for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) { - if (code_objs[dev].first) continue; - if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev])) { + if (code_objs[dev].first) { + // Specific target already matched, skipped. + // But for generic target, we will continue searching for matched specific target. + if (!isGenericTarget(code_objs[dev].first)) { + continue; + } + } + if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev], + genericVersion)) { + if (code_objs[dev].first == nullptr) --num_code_objs; code_objs[dev] = std::make_pair(image, image_size); - --num_code_objs; } } } @@ -879,7 +991,6 @@ hipError_t CodeObject::extractCodeObjectFromFatBinaryUsingComgr( std::string co_triple_target_id; bool valid_co = getTripleTargetID(bundleEntryId, image, co_triple_target_id); - if (valid_co) { LogPrintfError(" %s - [Code object targetID is %s]", bundleEntryId.c_str(), co_triple_target_id.c_str()); diff --git a/projects/clr/hipamd/src/hip_code_object.hpp b/projects/clr/hipamd/src/hip_code_object.hpp index dd7e6d28d3..a5bdaf0445 100644 --- a/projects/clr/hipamd/src/hip_code_object.hpp +++ b/projects/clr/hipamd/src/hip_code_object.hpp @@ -52,7 +52,7 @@ class CodeObject { // return code_objs{binary_ptr, binary_size}, which could be used to determine foffset static hipError_t ExtractCodeObjectFromFile(amd::Os::FileDesc fdesc, size_t fsize, const void ** image, const std::vector& device_names, - std::vector>& code_objs); + std::vector>& code_objs, size_t foffset); // Given an ptr to memory, extracts to code object for corresponding devices, // returns code_objs{binary_ptr, binary_size} and uniform resource indicator @@ -65,6 +65,12 @@ class CodeObject { static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed); + static unsigned int getGenericVersion(const void* image); + + static bool isGenericTarget(const void* image); + + static bool containGenericTarget(const void *data); + // Return size of fat bin static size_t getFatbinSize(const void* data, const bool isCompressed = false); diff --git a/projects/clr/hipamd/src/hip_fatbin.cpp b/projects/clr/hipamd/src/hip_fatbin.cpp index b26377135e..c434403726 100644 --- a/projects/clr/hipamd/src/hip_fatbin.cpp +++ b/projects/clr/hipamd/src/hip_fatbin.cpp @@ -118,7 +118,8 @@ void ListAllDeviceWithNoCOFromBundle(const std::unordered_map& devices) { +hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector& devices, + bool &containGenericTarget) { amd_comgr_data_t data_object {0}; amd_comgr_status_t comgr_status = AMD_COMGR_STATUS_SUCCESS; hipError_t hip_status = hipSuccess; @@ -187,6 +188,13 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vectorfsize_)) guarantee(false, "Cannot unmap the file"); image_ = nullptr; @@ -323,9 +331,10 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector& devices) { if (!HIP_USE_RUNTIME_UNBUNDLER) { - return ExtractFatBinaryUsingCOMGR(devices); + bool containGenericTarget = false; + hipError_t status = ExtractFatBinaryUsingCOMGR(devices, containGenericTarget); + if (!containGenericTarget) return status; } - hipError_t hip_error = hipSuccess; std::vector> code_objs; @@ -335,9 +344,12 @@ hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector& devi for (size_t dev_idx = 0; dev_idx < devices.size(); ++dev_idx) { device_names.push_back(devices[dev_idx]->devices()[0]->isa().isaName()); } - - // We are given file name, get the file desc and file size - if (fname_.size() > 0) { + if (image_ != nullptr) { + // We are directly given image pointer directly, try to extract file desc & file Size + hip_error = CodeObject::ExtractCodeObjectFromMemory(image_, + device_names, code_objs, uri_); + } else if (fname_.size() > 0) { + // We are given file name, get the file desc and file size // Get File Handle & size of the file. if (!amd::Os::GetFileHandle(fname_.c_str(), &fdesc_, &fsize_)) { return hipErrorFileNotFound; @@ -348,12 +360,7 @@ hipError_t FatBinaryInfo::ExtractFatBinary(const std::vector& devi // Extract the code object from file hip_error = CodeObject::ExtractCodeObjectFromFile(fdesc_, fsize_, &image_, - device_names, code_objs); - - } else if (image_ != nullptr) { - // We are directly given image pointer directly, try to extract file desc & file Size - hip_error = CodeObject::ExtractCodeObjectFromMemory(image_, - device_names, code_objs, uri_); + device_names, code_objs, foffset_); } else { return hipErrorInvalidValue; } diff --git a/projects/clr/hipamd/src/hip_fatbin.hpp b/projects/clr/hipamd/src/hip_fatbin.hpp index 5c4ea29761..fc1e9b3d1b 100644 --- a/projects/clr/hipamd/src/hip_fatbin.hpp +++ b/projects/clr/hipamd/src/hip_fatbin.hpp @@ -64,7 +64,8 @@ public: ~FatBinaryInfo(); // Loads Fat binary from file or image, unbundles COs for devices. - hipError_t ExtractFatBinaryUsingCOMGR(const std::vector& devices); + hipError_t ExtractFatBinaryUsingCOMGR(const std::vector& devices, + bool &containGenericTarget); /** * @brief Extract code object from fatbin using comgr unbundling action via calling diff --git a/projects/clr/hipamd/src/hiprtc/hiprtcComgrHelper.cpp b/projects/clr/hipamd/src/hiprtc/hiprtcComgrHelper.cpp index 9f902c071a..dd0c676de6 100644 --- a/projects/clr/hipamd/src/hiprtc/hiprtcComgrHelper.cpp +++ b/projects/clr/hipamd/src/hiprtc/hiprtcComgrHelper.cpp @@ -266,6 +266,30 @@ static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupp xnackSupported = false; sramEccSupported = false; proc_name = "gfx1201"; + case EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx9-generic"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: + xnackSupported = true; + sramEccSupported = false; + proc_name = "gfx10-1-generic"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx10-3-generic"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx11-generic"; + break; + case EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: + xnackSupported = false; + sramEccSupported = false; + proc_name = "gfx12-generic"; break; default: return false; @@ -309,12 +333,16 @@ static bool getTripleTargetIDFromCodeObject(const void* code_object, std::string } case ELFABIVERSION_AMDGPU_HSA_V4: - case ELFABIVERSION_AMDGPU_HSA_V5: { + case ELFABIVERSION_AMDGPU_HSA_V5: + case ELFABIVERSION_AMDGPU_HSA_V6: { if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V4) { LogPrintfInfo("[Code Object V4, target id:%s]", target_id.c_str()); - } else { + } else if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V5) { LogPrintfInfo("[Code Object V5, target id:%s]", target_id.c_str()); + } else if (ehdr->e_ident[EI_ABIVERSION] & ELFABIVERSION_AMDGPU_HSA_V6) { + LogPrintfInfo("[Code Object V6, target id:%s]", target_id.c_str()); } + unsigned co_sram_value = (ehdr->e_flags) & EF_AMDGPU_FEATURE_SRAMECC_V4; if (co_sram_value == EF_AMDGPU_FEATURE_SRAMECC_OFF_V4) target_id += ":sramecc-"; @@ -347,6 +375,42 @@ static bool consume(std::string& input, std::string consume_) { return true; } +// Is agent target compatible with generic code object target? +static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& agentTarget) { + // The map is subject to change per removing policy + static std::map genericTargetMap{ + // "gfx9-generic" + {"gfx900", "gfx9-generic"}, + {"gfx902", "gfx9-generic"}, + {"gfx904", "gfx9-generic"}, + {"gfx906", "gfx9-generic"}, + {"gfx909", "gfx9-generic"}, + {"gfx90c", "gfx9-generic"}, + // "gfx10-1-generic" + {"gfx1010", "gfx10-1-generic"}, + {"gfx1011", "gfx10-1-generic"}, + {"gfx1012", "gfx10-1-generic"}, + {"gfx1013", "gfx10-1-generic"}, + // "gfx10-3-generic" + {"gfx1030", "gfx10-3-generic"}, + {"gfx1031", "gfx10-3-generic"}, + {"gfx1032", "gfx10-3-generic"}, + {"gfx1033", "gfx10-3-generic"}, + {"gfx1034", "gfx10-3-generic"}, + {"gfx1035", "gfx10-3-generic"}, + {"gfx1036", "gfx10-3-generic"}, + // "gfx11-generic" + {"gfx1100", "gfx11-generic"}, + {"gfx1101", "gfx11-generic"}, + {"gfx1102", "gfx11-generic"}, + {"gfx1103", "gfx11-generic"}, + {"gfx1150", "gfx11-generic"}, + {"gfx1151", "gfx11-generic"}, + }; + auto search = genericTargetMap.find(agentTarget); + return search != genericTargetMap.end() && coTarget == search->second; +} + // Trim String till character, will be used to get gpuname // example: input is gfx908:sram-ecc+ and trim char is : // input will become sram-ecc+. @@ -382,7 +446,7 @@ static bool getTargetIDValue(std::string& input, std::string& processor, char& s } static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_object, - std::string& co_triple_target_id) { + std::string& co_triple_target_id) { std::string offload_kind = trimName(bundled_co_entry_id, '-'); if (offload_kind != OFFLOAD_KIND_HIPV4 && offload_kind != OFFLOAD_KIND_HIP && offload_kind != OFFLOAD_KIND_HCC) @@ -398,7 +462,7 @@ static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_ } bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, - std::string agent_triple_target_id) { + std::string agent_triple_target_id, unsigned& genericVersion) { // Primitive Check if (co_triple_target_id == agent_triple_target_id) return true; @@ -430,7 +494,14 @@ bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, if (!agent_triple_target_id.empty()) return false; // Check for compatibility - if (agent_isa_processor != co_processor) return false; + if (genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) { + // co_processor is generic target + if (!isCompatibleWithGenericTarget(co_processor, agent_isa_processor)) + return false; + } else if (agent_isa_processor != co_processor) { + return false; + } + if (co_sram_ecc != ' ') { if (co_sram_ecc != isa_sram_ecc) return false; } @@ -441,6 +512,17 @@ bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, return true; } +static inline unsigned int getGenericVersion(const void* image) { + const Elf64_Ehdr* ehdr = reinterpret_cast(image); + return ehdr->e_ident[EI_ABIVERSION] == ELFABIVERSION_AMDGPU_HSA_V6 + ? ((ehdr->e_flags & EF_AMDGPU_GENERIC_VERSION) >> EF_AMDGPU_GENERIC_VERSION_OFFSET) + : 0; +} + +static inline bool isGenericTarget(const void* image) { + return getGenericVersion(image) >= EF_AMDGPU_GENERIC_VERSION_MIN; +} + bool UnbundleBitCode(const std::vector& bundled_llvm_bitcode, const std::string& isa, size_t& co_offset, size_t& co_size) { std::string magic(bundled_llvm_bitcode.begin(), @@ -464,8 +546,10 @@ bool UnbundleBitCode(const std::vector& bundled_llvm_bitcode, const std::s const size_t image_size = desc->size; std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; + // Need call getTripleTargetID(...). // Check if the device id and code object id are compatible - if (isCodeObjectCompatibleWithDevice(bundleEntryId, isa)) { + unsigned genericVersion = getGenericVersion(image); + if (isCodeObjectCompatibleWithDevice(bundleEntryId, isa, genericVersion)) { co_offset = (reinterpret_cast(image) - reinterpret_cast(data)); co_size = image_size; break; diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLOfflineCompilation.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLOfflineCompilation.cpp index 17c5ae66ef..1abe5b0e8c 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLOfflineCompilation.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLOfflineCompilation.cpp @@ -137,6 +137,9 @@ void OCLOfflineCompilation::open(unsigned int test, char* units, char strVersion[128]; _wrapper->clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(name), name, NULL); + if (strstr(name, "-generic") != NULL) { + continue; // Skip generic target because it needs code object version 6 + } error_ = _wrapper->clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(strVersion), strVersion, 0); CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed"); diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index cb96df25b0..eb7454ee94 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -200,6 +200,7 @@ std::pair Isa::supportedIsas() { {"gfx90c", nullptr, true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90c:xnack-", "gfx90c", true, true, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90c:xnack+", "gfx90d", true, true, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-generic", nullptr, true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx1010", "gfx1010", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1010:xnack-", "gfx1010", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1010:xnack+", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, @@ -212,6 +213,7 @@ std::pair Isa::supportedIsas() { {"gfx1013", "gfx1013", true, false, 10, 1, 3, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1013:xnack-", "gfx1013", true, false, 10, 1, 3, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1013:xnack+", nullptr, true, false, 10, 1, 3, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx10-1-generic", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1030", "gfx1030", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1031", "gfx1031", true, true, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1032", "gfx1032", true, true, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, @@ -219,6 +221,7 @@ std::pair Isa::supportedIsas() { {"gfx1034", "gfx1034", true, true, 10, 3, 4, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1035", "gfx1035", true, true, 10, 3, 5, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1036", "gfx1036", true, true, 10, 3, 6, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx10-3-generic", nullptr, true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1100", "gfx1100", true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1101", "gfx1101", true, true, 11, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1102", "gfx1102", true, true, 11, 0, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, @@ -226,8 +229,10 @@ std::pair Isa::supportedIsas() { {"gfx1150", "gfx1150", true, true, 11, 5, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1151", "gfx1151", true, true, 11, 5, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1152", "gfx1152", true, true, 11, 5, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx11-generic", nullptr, true, true, 11, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1200", "gfx1200", true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1201", "gfx1201", true, true, 12, 0, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx12-generic", nullptr, true, true, 12, 0, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, }; return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_)); } @@ -242,13 +247,30 @@ std::string Isa::isaName() const { } bool Isa::isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa) { - if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() || - codeObjectIsa.versionMinor() != agentIsa.versionMinor() || - codeObjectIsa.versionStepping() != agentIsa.versionStepping()) - return false; - - assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() && - agentIsa.sramecc() != Feature::Any); + bool isGeneric = std::strstr(codeObjectIsa.targetId(), "generic") != nullptr; + if (isGeneric) { + if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() || + codeObjectIsa.versionMinor() > agentIsa.versionMinor() || + (codeObjectIsa.versionMinor() == agentIsa.versionMinor() && + codeObjectIsa.versionStepping() > agentIsa.versionStepping())) + return false; + if (std::strstr(agentIsa.targetId(), "gfx906") != nullptr) { + // For the generic target of gfx906, codeObjectIsa.isSrameccSupported() == false while + // agentIsa.isSrameccSupported() = true + assert(agentIsa.sramecc() != Feature::Any); + } + else { + assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() && + agentIsa.sramecc() != Feature::Any); + } + } else { + if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() || + codeObjectIsa.versionMinor() != agentIsa.versionMinor() || + codeObjectIsa.versionStepping() != agentIsa.versionStepping()) + return false; + assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() && + agentIsa.sramecc() != Feature::Any); + } if ((codeObjectIsa.sramecc() == Feature::Enabled || codeObjectIsa.sramecc() == Feature::Disabled) && codeObjectIsa.sramecc() != agentIsa.sramecc())