From fb76b9620c8a7611b1c6f50236667bee19f12ad0 Mon Sep 17 00:00:00 2001 From: Tao Sang Date: Fri, 29 Nov 2024 18:16:06 -0500 Subject: [PATCH] SWDEV-496667 - Support gfx9-4-generic target Support gfx9-4-generic target to cover mi3XX. Support features sramecc and xnack in generic target. Improve some code formats. Add more log on compiler. Change-Id: I6b3c6af55c60cffd43ce6f17b75998f751b75713 [ROCm/clr commit: 3ad8f1b811d718b287f0a15f449f765053c0349e] --- projects/clr/hipamd/src/amd_hsa_elf.hpp | 3 ++- projects/clr/hipamd/src/hip_code_object.cpp | 24 +++++++++++++++------ projects/clr/hipamd/src/hip_code_object.hpp | 2 +- projects/clr/rocclr/device/device.cpp | 23 +++++++++++++++++--- projects/clr/rocclr/platform/program.cpp | 5 +++++ 5 files changed, 45 insertions(+), 12 deletions(-) diff --git a/projects/clr/hipamd/src/amd_hsa_elf.hpp b/projects/clr/hipamd/src/amd_hsa_elf.hpp index f150d21d60..719408f184 100644 --- a/projects/clr/hipamd/src/amd_hsa_elf.hpp +++ b/projects/clr/hipamd/src/amd_hsa_elf.hpp @@ -121,10 +121,11 @@ enum : unsigned { EF_AMDGPU_MACH_AMDGCN_RESERVED_0X57 = 0x057, EF_AMDGPU_MACH_AMDGCN_RESERVED_0X58 = 0x058, EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC = 0x059, + EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC = 0x05f, // First/last AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, - EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC, + EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC, // Indicates if the "xnack" target feature is enabled for all code contained // in the object. diff --git a/projects/clr/hipamd/src/hip_code_object.cpp b/projects/clr/hipamd/src/hip_code_object.cpp index 557967117a..047ce5b301 100644 --- a/projects/clr/hipamd/src/hip_code_object.cpp +++ b/projects/clr/hipamd/src/hip_code_object.cpp @@ -97,7 +97,7 @@ bool CodeObject::IsClangOffloadMagicBundle(const void* data, bool& isCompressed) return false; } -unsigned int CodeObject::getGenericVersion(const void* image) { +uint32_t CodeObject::getGenericVersion(const void* image) { const Elf64_Ehdr* ehdr = reinterpret_cast(image); return (ehdr->e_machine == EM_AMDGPU && ehdr->e_ident[EI_OSABI] == ELFOSABI_AMDGPU_HSA && ehdr->e_ident[EI_ABIVERSION] == ELFABIVERSION_AMDGPU_HSA_V6) ? @@ -339,6 +339,11 @@ static bool getProcName(uint32_t EFlags, std::string& proc_name, bool& xnackSupp sramEccSupported = false; proc_name = "gfx9-generic"; break; + case EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC: + xnackSupported = true; + sramEccSupported = true; + proc_name = "gfx9-4-generic"; + break; case EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: xnackSupported = true; sramEccSupported = false; @@ -455,6 +460,11 @@ static bool isCompatibleWithGenericTarget(std::string& coTarget, std::string& ag {"gfx906", "gfx9-generic"}, {"gfx909", "gfx9-generic"}, {"gfx90c", "gfx9-generic"}, + // "gfx9-4-generic" + {"gfx940", "gfx9-4-generic"}, + {"gfx941", "gfx9-4-generic"}, + {"gfx942", "gfx9-4-generic"}, + {"gfx950", "gfx9-4-generic"}, // "gfx10-1-generic" {"gfx1010", "gfx10-1-generic"}, {"gfx1011", "gfx10-1-generic"}, @@ -656,17 +666,17 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary( std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; std::string co_triple_target_id; - unsigned int genericVersion = getGenericVersion(image); + uint32_t genericVersion = getGenericVersion(image); if (!getTripleTargetID(bundleEntryId, image, co_triple_target_id)) continue; - LogPrintfInfo("bundleEntryId=%s, co_triple_target_id=%s, genericVersion=%d\n", bundleEntryId.c_str(), - co_triple_target_id.c_str(), genericVersion); + LogPrintfInfo("bundleEntryId=%s, co_triple_target_id=%s, genericVersion=%u\n", + bundleEntryId.c_str(), co_triple_target_id.c_str(), genericVersion); for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) { if (code_objs[dev].first) { - // Specific target already matched, skipped. - // But for generic target, we will continue searching for matched specific target. if (!isGenericTarget(code_objs[dev].first)) { - continue; + continue; // Specific target already found + } else if(genericVersion >= EF_AMDGPU_GENERIC_VERSION_MIN) { + continue; // Generic target already found, no need to check another generic } } if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev], diff --git a/projects/clr/hipamd/src/hip_code_object.hpp b/projects/clr/hipamd/src/hip_code_object.hpp index a5bdaf0445..428514a34c 100644 --- a/projects/clr/hipamd/src/hip_code_object.hpp +++ b/projects/clr/hipamd/src/hip_code_object.hpp @@ -65,7 +65,7 @@ class CodeObject { static bool IsClangOffloadMagicBundle(const void* data, bool& isCompressed); - static unsigned int getGenericVersion(const void* image); + static uint32_t getGenericVersion(const void* image); static bool isGenericTarget(const void* image); diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index eb7454ee94..fb45f77dee 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -200,7 +200,18 @@ std::pair Isa::supportedIsas() { {"gfx90c", nullptr, true, true, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90c:xnack-", "gfx90c", true, true, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90c:xnack+", "gfx90d", true, true, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx9-generic", nullptr, true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-generic", nullptr, true, true, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-generic:xnack-", nullptr, true, true, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-generic:xnack+", nullptr, true, true, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-4-generic", nullptr, true, true, 9, 4, 0, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-4-generic:sramecc-",nullptr, true, true, 9, 4, 0, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-4-generic:sramecc+",nullptr, true, true, 9, 4, 0, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-4-generic:xnack-", nullptr, true, true, 9, 4, 0, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-4-generic:xnack+", nullptr, true, true, 9, 4, 0, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-4-generic:sramecc-:xnack-",nullptr,true,true, 9, 4, 0, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-4-generic:sramecc-:xnack+",nullptr,true,true, 9, 4, 0, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-4-generic:sramecc+:xnack-",nullptr,true,true, 9, 4, 0, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx9-4-generic:sramecc+:xnack+",nullptr,true,true, 9, 4, 0, ON, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx1010", "gfx1010", true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1010:xnack-", "gfx1010", true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1010:xnack+", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, @@ -213,7 +224,9 @@ std::pair Isa::supportedIsas() { {"gfx1013", "gfx1013", true, false, 10, 1, 3, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1013:xnack-", "gfx1013", true, false, 10, 1, 3, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1013:xnack+", nullptr, true, false, 10, 1, 3, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, - {"gfx10-1-generic", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx10-1-generic", nullptr, true, true, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx10-1-generic:xnack-", nullptr, true, true, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx10-1-generic:xnack+", nullptr, true, true, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1030", "gfx1030", true, true, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1031", "gfx1031", true, true, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1032", "gfx1032", true, true, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, @@ -252,8 +265,11 @@ bool Isa::isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa) { if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() || codeObjectIsa.versionMinor() > agentIsa.versionMinor() || (codeObjectIsa.versionMinor() == agentIsa.versionMinor() && - codeObjectIsa.versionStepping() > agentIsa.versionStepping())) + codeObjectIsa.versionStepping() > agentIsa.versionStepping())) { return false; + } +#ifdef DEBUG + // Only check in DEBUG mode if (std::strstr(agentIsa.targetId(), "gfx906") != nullptr) { // For the generic target of gfx906, codeObjectIsa.isSrameccSupported() == false while // agentIsa.isSrameccSupported() = true @@ -263,6 +279,7 @@ bool Isa::isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa) { assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() && agentIsa.sramecc() != Feature::Any); } +#endif } else { if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() || codeObjectIsa.versionMinor() != agentIsa.versionMinor() || diff --git a/projects/clr/rocclr/platform/program.cpp b/projects/clr/rocclr/platform/program.cpp index 9c7f354522..d8d0f50841 100644 --- a/projects/clr/rocclr/platform/program.cpp +++ b/projects/clr/rocclr/platform/program.cpp @@ -612,11 +612,16 @@ bool Program::load(const std::vector& devices) { } if (!devProgram.load()) { + if (!devProgram.buildLog().empty()) { + LogPrintfError("devProgram.load() failed with buildLog=%s\n", + devProgram.buildLog().c_str()); + } return false; } // Run kernels marked with init if (!devProgram.runInitKernels()) { + LogError("runInitKernels() failed\n"); return false; } }