From ea28025939782969fc1d86a310e875fe2d7fbdcf Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Wed, 12 Jan 2022 16:31:12 -0500 Subject: [PATCH] SWDEV-318505 - Update HSAIL xnack path Report proper target id for xnack in HSAIL path. Runtime will use ISA table and report hsailName(). Fix offline compilation path for PAL. Change-Id: Ic0250bf6b9c193d867aec9800a319da1bf00c3ee [ROCm/clr commit: a543d4a8605f4082495080f1442e2997f6890474] --- projects/clr/rocclr/device/device.cpp | 21 +++++++-------- projects/clr/rocclr/device/pal/paldevice.cpp | 22 ++++++++------- projects/clr/rocclr/device/pal/palkernel.cpp | 7 ++--- projects/clr/rocclr/device/pal/palprogram.cpp | 27 +++---------------- projects/clr/rocclr/device/pal/palprogram.hpp | 2 -- 5 files changed, 27 insertions(+), 52 deletions(-) diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index 935617dc0d..1968801a98 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -122,16 +122,15 @@ std::pair Isa::supportedIsas() { {"gfx702", "gfx702", true, false, true, 7, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Hawaii (can execute Hawiipro code) {"gfx703", nullptr, false, false, true, 7, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Mullins {"gfx704", "Bonaire", false, false, true, 7, 0, 4, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx705", "Mullins", false, false, true, 7, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Godavari - {"gfx801", nullptr, true, true, true, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx801:xnack-", "Carrizo", true, true, true, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx801:xnack+", nullptr, true, true, true, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx705", "Mullins", false, false, true, 7, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Godavari {"gfx801", nullptr, true, true, true, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx801:xnack-", nullptr, true, false, true, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx801:xnack+", "Carrizo", true, true, true, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx802", "Tonga", true, true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Iceland {"gfx803", "Fiji", true, true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Ellesmere/Polaris10, Baffin/Polaris11, Polaris12, Polaris22/VegaM {"gfx805", nullptr, true, true, true, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Tongapro {"gfx810", nullptr, true, true, true, 8, 1, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx810:xnack-", "Stoney", true, true, true, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx810:xnack+", nullptr, true, true, true, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx810:xnack-", nullptr, true, false, true, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx810:xnack+", "Stoney", true, true, true, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx900", "gfx901", true, true, false, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Greenland {"gfx900:xnack-", "gfx900", true, true, false, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx900:xnack+", "gfx901", true, true, false, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, @@ -159,9 +158,9 @@ std::pair Isa::supportedIsas() { {"gfx908:sramecc-:xnack+", nullptr, true, false, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx908:sramecc+:xnack-", nullptr, true, false, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx908:sramecc+:xnack+", nullptr, true, false, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx902", "gfx903", false, true, false, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven2 (can execute Raven code) - {"gfx902:xnack-", "gfx902", false, true, false, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx902:xnack+", "gfx902", false, true, false, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx909", nullptr, false, true, false, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven2 (can execute Raven code) + {"gfx909:xnack-", nullptr, false, true, false, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx909:xnack+", nullptr, false, true, false, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a", nullptr, true, false, false, 9, 0, 10, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a:sramecc-", nullptr, true, false, false, 9, 0, 10, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a:sramecc+", nullptr, true, false, false, 9, 0, 10, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32}, @@ -172,8 +171,8 @@ std::pair Isa::supportedIsas() { {"gfx90a:sramecc+:xnack-", nullptr, true, false, false, 9, 0, 10, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90a:sramecc+:xnack+", nullptr, true, false, false, 9, 0, 10, ON, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx90c", nullptr, true, true, false, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Renoir - {"gfx90c:xnack-", "gfx90c", true, true, false, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, - {"gfx90c:xnack+", nullptr, true, true, false, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx90c:xnack-", nullptr, true, false, false, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx90c:xnack+", nullptr, true, false, false, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, {"gfx1010", "gfx1010", true, true, false, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1010:xnack-", "gfx1010", true, true, false, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, {"gfx1010:xnack+", nullptr, true, true, false, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index 398bdaf530..56f1f0acf7 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -72,13 +72,6 @@ struct PalDevice { static constexpr PalDevice supportedPalDevices[] = { // GFX Version PAL GFX IP Level PAL Name PAL ASIC Revision - {7, 0, 0, Pal::GfxIpLevel::GfxIp7, "Kalindi", Pal::AsicRevision::Kalindi}, - {7, 0, 0, Pal::GfxIpLevel::GfxIp7, "Spectre", Pal::AsicRevision::Spectre}, - {7, 0, 0, Pal::GfxIpLevel::GfxIp7, "Spooky", Pal::AsicRevision::Spooky}, - {7, 0, 1, Pal::GfxIpLevel::GfxIp7, "Hawaii", Pal::AsicRevision::HawaiiPro}, - {7, 0, 2, Pal::GfxIpLevel::GfxIp7, "Hawaii", Pal::AsicRevision::Hawaii}, - {7, 0, 4, Pal::GfxIpLevel::GfxIp7, "Bonaire", Pal::AsicRevision::Bonaire}, - {7, 0, 5, Pal::GfxIpLevel::GfxIp7, "Mullins", Pal::AsicRevision::Godavari}, // FIXME: Why is this compiled as Mullins yet reported as Godavari? Add gfx703 to support Mullins. {8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Carrizo", Pal::AsicRevision::Carrizo}, {8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Bristol Ridge", Pal::AsicRevision::Bristol}, {8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Iceland", Pal::AsicRevision::Iceland}, @@ -94,7 +87,7 @@ static constexpr PalDevice supportedPalDevices[] = { {9, 0, 4, Pal::GfxIpLevel::GfxIp9, "gfx904", Pal::AsicRevision::Vega12}, {9, 0, 6, Pal::GfxIpLevel::GfxIp9, "gfx906", Pal::AsicRevision::Vega20}, {9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven2}, - {9, 0, 12, Pal::GfxIpLevel::GfxIp9, "gfx90c", Pal::AsicRevision::Renoir}, + {9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Renoir}, {10, 1, 0, Pal::GfxIpLevel::GfxIp10_1, "gfx1010", Pal::AsicRevision::Navi10}, {10, 1, 1, Pal::GfxIpLevel::GfxIp10_1, "gfx1011", Pal::AsicRevision::Navi12}, {10, 1, 2, Pal::GfxIpLevel::GfxIp10_1, "gfx1012", Pal::AsicRevision::Navi14}, @@ -117,7 +110,8 @@ static std::tuple findIsa(Pal::AsicRevision asicRe palDeviceIter->gfxipMajor_, palDeviceIter->gfxipMinor_, palDeviceIter->gfxipStepping_, sramecc ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled, xnack ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled); - return std::make_tuple(isa, palDeviceIter->palName_); + return std::make_tuple( + isa, (palDeviceIter->gfxipMajor_ > 8) ? isa->hsailName() : palDeviceIter->palName_); } static std::tuple findPal(uint32_t gfxipMajor, @@ -170,7 +164,8 @@ bool NullDevice::init() { // device. This allows code objects to be compiled for all supported ISAs. std::vector devices = getDevices(CL_DEVICE_TYPE_GPU, false); for (const amd::Isa *isa = amd::Isa::begin(); isa != amd::Isa::end(); isa++) { - if (!isa->runtimePalSupported()) { + if (!isa->runtimePalSupported() || (isa->sramecc() == amd::Isa::Feature::Any) || + (isa->xnack() == amd::Isa::Feature::Any)) { continue; } bool isOnline = false; @@ -243,6 +238,13 @@ bool NullDevice::create(const char* palName, const amd::Isa& isa, Pal::GfxIpLeve LogPrintfError("Unable to create PAL setting for offline PAL device %s", isa.targetId()); return false; } + if (!settings().useLightning_) { + if ((isa.hsailName() != nullptr)) { + palName_ = isa.hsailName(); + } else { + return false; + } + } if (!ValidateComgr()) { LogPrintfError("Code object manager initialization failed for offline PAL device %s", isa.targetId()); diff --git a/projects/clr/rocclr/device/pal/palkernel.cpp b/projects/clr/rocclr/device/pal/palkernel.cpp index ac9f28e690..8fe5b64fcc 100644 --- a/projects/clr/rocclr/device/pal/palkernel.cpp +++ b/projects/clr/rocclr/device/pal/palkernel.cpp @@ -68,9 +68,6 @@ void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize, } bool HSAILKernel::setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t* akc) { - if (prog().isNull()) { - return false; - } if (!sym) { return false; } @@ -134,8 +131,8 @@ bool HSAILKernel::init() { // Pull out metadata from the ELF size_t sizeOfArgList; acl_error error = amd::Hsail::QueryInfo(palNullDevice().compiler(), prog().binaryElf(), - RT_ARGUMENT_ARRAY, openClKernelName.c_str(), - nullptr, &sizeOfArgList); + RT_ARGUMENT_ARRAY, openClKernelName.c_str(), + nullptr, &sizeOfArgList); if (error != ACL_SUCCESS) { return false; } diff --git a/projects/clr/rocclr/device/pal/palprogram.cpp b/projects/clr/rocclr/device/pal/palprogram.cpp index c2d2b8f5d5..76f89a0389 100644 --- a/projects/clr/rocclr/device/pal/palprogram.cpp +++ b/projects/clr/rocclr/device/pal/palprogram.cpp @@ -200,8 +200,7 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner) loaderContext_(this) { assert(!device.isOnline()); isNull_ = true; - // Cannot load onto a NullDevice. - loader_ = nullptr; + loader_ = amd::hsa::loader::Loader::Create(&loaderContext_); } HSAILProgram::~HSAILProgram() { @@ -246,12 +245,6 @@ inline static std::vector splitSpaceSeparatedString(char* str) { bool HSAILProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize, bool internalKernel) { #if defined(WITH_COMPILER_LIB) - // Stop compilation if it is an offline device - PAL runtime does not - // support ISA compiled offline - if (!device().isOnline()) { - return true; - } - // ACL_TYPE_CG stage is not performed for offline compilation executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr); if (executable_ == nullptr) { @@ -539,10 +532,6 @@ bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) // could not find it, or the PAL runtime does not support it. return false; } - if (program_->isNull()) { - // Cannot load code onto offline devices. - return false; - } return amd::Isa::isCompatible(*code_object_isa_p, program_->device().isa()); } @@ -742,13 +731,7 @@ bool LightningProgram::createBinary(amd::option::Options* options) { bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUniformWorkGroupSize, bool internalKernel) { #if defined(USE_COMGR_LIBRARY) - // Stop compilation if it is an offline device - PAL runtime does not - // support ISA compiled offline - if (!device().isOnline()) { - return true; - } - - // Find the size of global variables from the binary + // Find the size of global variables from the binary if (!FindGlobalVarSize(binary, binSize)) { buildLog_ += "Error: Cannot Find Global Var Sizes\n"; return false; @@ -798,12 +781,8 @@ bool LightningProgram::createKernels(void* binary, size_t binSize, bool useUnifo bool LightningProgram::setKernels(void* binary, size_t binSize, amd::Os::FileDesc fdesc, size_t foffset, std::string uri) { #if defined(USE_COMGR_LIBRARY) - if (!device().isOnline()) { - return true; - } - // Collect the information about compiled binary - if (palDevice().rgpCaptureMgr() != nullptr) { + if (!isNull() && (palDevice().rgpCaptureMgr() != nullptr)) { apiHash_ = palDevice().rgpCaptureMgr()->AddElfBinary(binary, binSize, binary, binSize, codeSegGpu_->iMem(), codeSegGpu_->offset()); } diff --git a/projects/clr/rocclr/device/pal/palprogram.hpp b/projects/clr/rocclr/device/pal/palprogram.hpp index e5b4942cb6..afbe857052 100644 --- a/projects/clr/rocclr/device/pal/palprogram.hpp +++ b/projects/clr/rocclr/device/pal/palprogram.hpp @@ -164,7 +164,6 @@ class HSAILProgram : public device::Program { //! Return a typecasted PAL device. The device must not be the NullDevice. pal::Device& palDevice() { - assert(!isNull()); return const_cast(static_cast(device())); } @@ -191,7 +190,6 @@ class HSAILProgram : public device::Program { //! Returns CPU address for a kernel uint64_t findHostKernelAddress(uint64_t devAddr) const { - assert(!isNull()); return loader_->FindHostAddress(devAddr); }