From 52a1f5dbf737ef3bf482325d37e511e0543376fe Mon Sep 17 00:00:00 2001 From: agunashe Date: Tue, 7 Jan 2025 11:04:44 -0800 Subject: [PATCH] SWDEV-507967 - Deprecate gfx9, gfx8, gfx7 on Windows PAL_CLIENT_INTERFACE_MAJOR_VERSION from 872 --> 910 Change-Id: I03dfa2924ccdae4c2f13f09d5f34ee58298e1343 [ROCm/clr commit: ea804e16f85a7a853f9be60dbfebffb322157d72] --- projects/clr/rocclr/cmake/ROCclrPAL.cmake | 2 +- projects/clr/rocclr/device/device.hpp | 4 +- projects/clr/rocclr/device/devprogram.cpp | 8 +-- projects/clr/rocclr/device/pal/palblit.cpp | 12 ++-- .../clr/rocclr/device/pal/palcounters.cpp | 21 ------ projects/clr/rocclr/device/pal/paldevice.cpp | 22 +------ .../clr/rocclr/device/pal/palresource.cpp | 11 +--- .../clr/rocclr/device/pal/palsettings.cpp | 64 +------------------ .../clr/rocclr/device/pal/palsettings.hpp | 6 +- .../clr/rocclr/device/pal/paltimestamp.cpp | 4 +- 10 files changed, 18 insertions(+), 136 deletions(-) diff --git a/projects/clr/rocclr/cmake/ROCclrPAL.cmake b/projects/clr/rocclr/cmake/ROCclrPAL.cmake index 4d128ee3b8..91122433cd 100644 --- a/projects/clr/rocclr/cmake/ROCclrPAL.cmake +++ b/projects/clr/rocclr/cmake/ROCclrPAL.cmake @@ -20,7 +20,7 @@ set(PAL_CLIENT "OCL") -set(PAL_CLIENT_INTERFACE_MAJOR_VERSION 872) +set(PAL_CLIENT_INTERFACE_MAJOR_VERSION 910) set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION 42) set(GPUOPEN_CLIENT_INTERFACE_MINOR_VERSION 0) set(AMD_DK_ROOT $ENV{DK_ROOT}) diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index d12f896d9b..09db38f4fd 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -673,8 +673,6 @@ class Settings : public amd::HeapObject { uint customHostAllocator_ : 1; //!< True if device has custom host allocator // that replaces generic OS allocation routines uint supportDepthsRGB_ : 1; //!< Support DEPTH and sRGB channel order format - uint reportFMAF_ : 1; //!< Report FP_FAST_FMAF define in CL program - uint reportFMA_ : 1; //!< Report FP_FAST_FMA define in CL program uint singleFpDenorm_ : 1; //!< Support Single FP Denorm uint hsailExplicitXnack_ : 1; //!< Xnack in hsail path for this device uint useLightning_ : 1; //!< Enable LC path for this device @@ -689,7 +687,7 @@ class Settings : public amd::HeapObject { uint gwsInitSupported_:1; //!< Check if GWS is supported on this machine. uint kernel_arg_opt_: 1; //!< Enables kernel arg optimization for blit kernels uint kernel_arg_impl_ : 2; //!< Kernel argument implementation - uint reserved_ : 7; + uint reserved_ : 12; }; uint value_; }; diff --git a/projects/clr/rocclr/device/devprogram.cpp b/projects/clr/rocclr/device/devprogram.cpp index b3b8f844da..1b82993f1e 100644 --- a/projects/clr/rocclr/device/devprogram.cpp +++ b/projects/clr/rocclr/device/devprogram.cpp @@ -1926,12 +1926,8 @@ std::vector Program::ProcessOptions(amd::option::Options* options) // Set options for the standard device specific options // All our devices support these options now - if (device().settings().reportFMAF_) { - optionsVec.push_back("-DFP_FAST_FMAF=1"); - } - if (device().settings().reportFMA_) { - optionsVec.push_back("-DFP_FAST_FMA=1"); - } + optionsVec.push_back("-DFP_FAST_FMAF=1"); + optionsVec.push_back("-DFP_FAST_FMA=1"); } else { if (!isHIP()) { diff --git a/projects/clr/rocclr/device/pal/palblit.cpp b/projects/clr/rocclr/device/pal/palblit.cpp index c39cf0db08..bd3d39c80f 100644 --- a/projects/clr/rocclr/device/pal/palblit.cpp +++ b/projects/clr/rocclr/device/pal/palblit.cpp @@ -1066,7 +1066,7 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory, bool result = false; amd::Image::Format newFormat(gpuMem(dstMemory).desc().format_); bool swapLayer = - (dstView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && dev().settings().gfx10Plus_; + dstView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY; // Find unsupported formats for (uint i = 0; i < RejectedFormatDataTotal; ++i) { @@ -1388,7 +1388,7 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory, bool result = false; amd::Image::Format newFormat(gpuMem(srcMemory).desc().format_); bool swapLayer = - (srcView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && dev().settings().gfx10Plus_; + srcView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY; // Find unsupported formats for (uint i = 0; i < RejectedFormatDataTotal; ++i) { @@ -1655,16 +1655,14 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst // Program source origin int32_t srcOrg[4] = {(int32_t)srcOrigin[0], (int32_t)srcOrigin[1], (int32_t)srcOrigin[2], 0}; - if ((gpuMem(srcMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && - dev().settings().gfx10Plus_) { + if (gpuMem(srcMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) { srcOrg[3] = 1; } setArgument(kernels_[blitType], 2, sizeof(srcOrg), srcOrg); // Program destinaiton origin int32_t dstOrg[4] = {(int32_t)dstOrigin[0], (int32_t)dstOrigin[1], (int32_t)dstOrigin[2], 0}; - if ((gpuMem(dstMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && - dev().settings().gfx10Plus_) { + if (gpuMem(dstMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) { dstOrg[3] = 1; } setArgument(kernels_[blitType], 3, sizeof(dstOrg), dstOrg); @@ -2329,7 +2327,7 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern, Memory* memView = &gpuMem(memory); amd::Image::Format newFormat(gpuMem(memory).owner()->asImage()->getImageFormat()); bool swapLayer = - (memView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && dev().settings().gfx10Plus_; + memView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY; // Program the kernels workload depending on the fill dimensions fillType = FillImage; diff --git a/projects/clr/rocclr/device/pal/palcounters.cpp b/projects/clr/rocclr/device/pal/palcounters.cpp index c574d563bb..178d104189 100644 --- a/projects/clr/rocclr/device/pal/palcounters.cpp +++ b/projects/clr/rocclr/device/pal/palcounters.cpp @@ -667,27 +667,6 @@ static constexpr std::array, 140> gfx10BlockIdPal = {{ void PerfCounter::convertInfo() { switch (dev().ipLevel()) { - case Pal::GfxIpLevel::GfxIp7: - if (info_.blockIndex_ < ciBlockIdOrcaToPal.size()) { - auto p = ciBlockIdOrcaToPal[info_.blockIndex_]; - info_.blockIndex_ = std::get<0>(p); - info_.counterIndex_ = std::get<1>(p); - } - break; - case Pal::GfxIpLevel::GfxIp8: - if (info_.blockIndex_ < viBlockIdOrcaToPal.size()) { - auto p = viBlockIdOrcaToPal[info_.blockIndex_]; - info_.blockIndex_ = std::get<0>(p); - info_.counterIndex_ = std::get<1>(p); - } - break; - case Pal::GfxIpLevel::GfxIp9: - if (info_.blockIndex_ < gfx9BlockIdPal.size()) { - auto p = gfx9BlockIdPal[info_.blockIndex_]; - info_.blockIndex_ = std::get<0>(p); - info_.counterIndex_ = std::get<1>(p); - } - break; case Pal::GfxIpLevel::GfxIp10_1: case Pal::GfxIpLevel::GfxIp10_3: case Pal::GfxIpLevel::GfxIp11_0: diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index dd50983923..ecc64a215b 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -84,21 +84,6 @@ struct PalDevice { static constexpr PalDevice supportedPalDevices[] = { // GFX Version PAL GFX IP Level PAL Name PAL ASIC Revision - {8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Carrizo", Pal::AsicRevision::Carrizo}, - {8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Bristol Ridge", Pal::AsicRevision::Bristol}, - {8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Iceland", Pal::AsicRevision::Iceland}, - {8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Tonga", Pal::AsicRevision::Tonga}, // Also Tongapro (generated code is for Tonga) - {8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Fiji", Pal::AsicRevision::Fiji}, - {8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Ellesmere", Pal::AsicRevision::Polaris10}, // Ellesmere - {8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Baffin", Pal::AsicRevision::Polaris11}, // Baffin - {8, 0, 3, Pal::GfxIpLevel::GfxIp8, "gfx803", Pal::AsicRevision::Polaris12}, // Lexa - {8, 1, 0, Pal::GfxIpLevel::GfxIp8_1, "Stoney", Pal::AsicRevision::Stoney}, - {9, 0, 0, Pal::GfxIpLevel::GfxIp9, "gfx900", Pal::AsicRevision::Vega10}, - {9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven}, - {9, 0, 4, Pal::GfxIpLevel::GfxIp9, "gfx904", Pal::AsicRevision::Vega12}, - {9, 0, 6, Pal::GfxIpLevel::GfxIp9, "gfx906", Pal::AsicRevision::Vega20}, - {9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven2}, - {9, 0, 12, Pal::GfxIpLevel::GfxIp9, "gfx90c", Pal::AsicRevision::Renoir}, {10, 1, 0, Pal::GfxIpLevel::GfxIp10_1, "gfx1010", Pal::AsicRevision::Navi10}, {10, 1, 1, Pal::GfxIpLevel::GfxIp10_1, "gfx1011", Pal::AsicRevision::Navi12}, {10, 1, 2, Pal::GfxIpLevel::GfxIp10_1, "gfx1012", Pal::AsicRevision::Navi14}, @@ -391,10 +376,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, if (settings().checkExtension(ClKhrFp64)) { info_.doubleFPConfig_ = info_.singleFPConfig_ | CL_FP_DENORM; } - - if (settings().reportFMA_) { - info_.singleFPConfig_ |= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT; - } + info_.singleFPConfig_ |= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT; if (settings().checkExtension(ClKhrFp16)) { info_.halfFPConfig_ = info_.singleFPConfig_; @@ -587,7 +569,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, if (settings().svmFineGrainSystem_) { info_.svmCapabilities_ |= CL_DEVICE_SVM_FINE_GRAIN_SYSTEM; } - if (amd::IS_HIP && ipLevel_ >= Pal::GfxIpLevel::GfxIp9) { + if (amd::IS_HIP) { info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS; } diff --git a/projects/clr/rocclr/device/pal/palresource.cpp b/projects/clr/rocclr/device/pal/palresource.cpp index c6545cbc2b..d62dac6ab6 100644 --- a/projects/clr/rocclr/device/pal/palresource.cpp +++ b/projects/clr/rocclr/device/pal/palresource.cpp @@ -1043,12 +1043,7 @@ bool Resource::CreateInterop(CreateParams* params) { //! and OGL decompresses 24bit DEPTH into D24S8 for OGL compatibility if ((desc().format_.image_channel_order == CL_DEPTH_STENCIL) && (desc().format_.image_channel_data_type == CL_UNORM_INT24)) { - if (dev().settings().gfx10Plus_) { - hwState_[1] = (hwState_[1] & ~0x1ff00000) | 0x08d00000; - } else { - hwState_[1] &= ~0x3c000000; - hwState_[1] = (hwState_[1] & ~0x3f00000) | 0x1400000; - } + hwState_[1] = (hwState_[1] & ~0x1ff00000) | 0x08d00000; } hwState_[8] = GetHSAILImageFormatType(desc().format_); hwState_[9] = GetHSAILImageOrderType(desc().format_); @@ -1253,7 +1248,7 @@ bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear // Force remote allocation if it was requested in the settings if (dev().settings().remoteAlloc_ && ((memoryType() == Local) || (memoryType() == Persistent))) { - if (dev().settings().apuSystem_ && dev().settings().viPlus_) { + if (dev().settings().apuSystem_) { desc_.type_ = Remote; } else { desc_.type_ = RemoteUSWC; @@ -1512,7 +1507,7 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin, // Make sure linear pitch in bytes is 4 bytes aligned if (((gpuMemoryRowPitch % 4) != 0) || // another DRM restriciton... SI has 4 pixels - (gpuMemoryOffset % 4 != 0) || (dev().settings().sdamPageFaultWar_ && (imageOffsetx != 0))) { + (gpuMemoryOffset % 4 != 0) || (imageOffsetx != 0)) { return false; } } diff --git a/projects/clr/rocclr/device/pal/palsettings.cpp b/projects/clr/rocclr/device/pal/palsettings.cpp index 758a0416b6..c4b2cf5092 100644 --- a/projects/clr/rocclr/device/pal/palsettings.cpp +++ b/projects/clr/rocclr/device/pal/palsettings.cpp @@ -67,10 +67,6 @@ Settings::Settings() { pinnedMinXferSize_ = flagIsDefault(GPU_PINNED_MIN_XFER_SIZE) ? defaultMinXferSize * Mi : GPU_PINNED_MIN_XFER_SIZE * Mi; - // Disable FP_FAST_FMA defines by default - reportFMAF_ = false; - reportFMA_ = false; - // GPU device by default apuSystem_ = false; @@ -93,11 +89,6 @@ Settings::Settings() { // Use image DMA if requested imageDMA_ = GPU_IMAGE_DMA; - // Disable ASIC specific features by default - viPlus_ = false; - aiPlus_ = false; - gfx10Plus_ = false; - // Number of compute rings. numComputeRings_ = 0; @@ -116,9 +107,6 @@ Settings::Settings() { // Don't support Denormals for single precision by default singleFpDenorm_ = false; - // Disable SDMA workaround by default - sdamPageFaultWar_ = false; - // SQTT buffer size in bytes rgpSqttDispCount_ = PAL_RGP_DISP_COUNT; rgpSqttWaitIdle_ = true; @@ -201,8 +189,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp, case Pal::AsicRevision::Navi14: case Pal::AsicRevision::Navi12: case Pal::AsicRevision::Navi10: - case Pal::AsicRevision::Navi10_A0: - gfx10Plus_ = true; useLightning_ = GPU_ENABLE_LC; enableWgpMode_ = GPU_ENABLE_WGP_MODE; if (useLightning_) { @@ -219,51 +205,14 @@ bool Settings::create(const Pal::DeviceProperties& palProp, // GFX10.1 HW doesn't support custom pitch. Enable double copy workaround imageBufferWar_ = GPU_IMAGE_BUFFER_WAR; } - // Fall through to AI (gfx9) ... - case Pal::AsicRevision::Vega20: - // Enable HW P2P path for Vega20+. Runtime still relies on KMD/PAL for support enableHwP2P_ = true; - case Pal::AsicRevision::Vega12: - case Pal::AsicRevision::Vega10: - case Pal::AsicRevision::Raven: - case Pal::AsicRevision::Raven2: - case Pal::AsicRevision::Renoir: - aiPlus_ = true; enableCoopGroups_ = IS_LINUX; enableCoopMultiDeviceGroups_ = IS_LINUX; if (useLightning_) { singleFpDenorm_ = true; } - // Fall through to VI ... - case Pal::AsicRevision::Carrizo: - case Pal::AsicRevision::Bristol: - case Pal::AsicRevision::Stoney: - case Pal::AsicRevision::Iceland: - case Pal::AsicRevision::Tonga: - case Pal::AsicRevision::Fiji: - case Pal::AsicRevision::Polaris10: - case Pal::AsicRevision::Polaris11: - case Pal::AsicRevision::Polaris12: - // Keep this false even though we have support - // singleFpDenorm_ = true; - viPlus_ = true; - // SDMA may have memory access outside of - // the valid buffer range and cause a page fault - sdamPageFaultWar_ = true; enableExtension(ClKhrFp16); - // Fall through to CI ... - case Pal::AsicRevision::Kalindi: - case Pal::AsicRevision::Godavari: - case Pal::AsicRevision::Spectre: - case Pal::AsicRevision::Spooky: - case Pal::AsicRevision::Bonaire: - case Pal::AsicRevision::Hawaii: - case Pal::AsicRevision::HawaiiPro: threadTraceEnable_ = AMD_THREAD_TRACE_ENABLE; - reportFMAF_ = false; - if ((palProp.revision == Pal::AsicRevision::Hawaii) || aiPlus_) { - reportFMAF_ = true; - } // Cache line size is 64 bytes cacheLineSize_ = 64; // L1 cache size is 16KB @@ -293,13 +242,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp, } else { maxAllocSize_ = 3ULL * Gi; } - - // Note: More than 4 command buffers may cause a HW hang - // with HWSC on pre-gfx9 devices in OCLPerfKernelArguments - if (!aiPlus_) { - maxCmdBuffers_ = 4; - } - supportRA_ = false; numMemDependencies_ = GPU_NUM_MEM_DEPENDENCY; break; @@ -345,7 +287,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp, if (hwLDSSize_ == 0) { // Use hardcoded values for now, since PAL properties aren't available with offline devices - hwLDSSize_ = (IS_LINUX || amd::IS_HIP || gfx10Plus_) ? 64 * Ki: 32 * Ki; + hwLDSSize_ = (IS_LINUX || amd::IS_HIP) ? 64 * Ki: 32 * Ki; } imageSupport_ = true; @@ -357,10 +299,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp, // HW doesn't support untiled image writes // hostMemDirectAccess_ |= HostMemImage; - // Report FP_FAST_FMA define if double precision HW - reportFMA_ = true; - reportFMAF_ = true; - if (doublePrecision_) { // Enable KHR double precision extension enableExtension(ClKhrFp64); diff --git a/projects/clr/rocclr/device/pal/palsettings.hpp b/projects/clr/rocclr/device/pal/palsettings.hpp index cb270ad5c6..f981c748aa 100644 --- a/projects/clr/rocclr/device/pal/palsettings.hpp +++ b/projects/clr/rocclr/device/pal/palsettings.hpp @@ -65,21 +65,17 @@ class Settings : public device::Settings { uint use64BitPtr_ : 1; //!< Use 64bit pointers on GPU uint force32BitOcl20_ : 1; //!< Force 32bit apps to take CLANG/HSAIL path on GPU uint imageDMA_ : 1; //!< Enable direct image DMA transfers - uint viPlus_ : 1; //!< VI and post VI features - uint aiPlus_ : 1; //!< AI and post AI features - uint gfx10Plus_ : 1; //!< gfx10 and post gfx10 features uint threadTraceEnable_ : 1; //!< Thread trace enable uint svmAtomics_ : 1; //!< SVM device atomics uint svmFineGrainSystem_ : 1; //!< SVM fine grain system support uint useDeviceQueue_ : 1; //!< Submit to separate device queue - uint sdamPageFaultWar_ : 1; //!< SDMA page fault workaround uint rgpSqttWaitIdle_ : 1; //!< Wait for idle after SQTT trace uint rgpSqttForceDisable_ : 1; //!< Disables SQTT uint enableHwP2P_ : 1; //!< Forces HW P2P path for testing uint imageBufferWar_ : 1; //!< Image buffer workaround for Gfx10 uint disableSdma_ : 1; //!< Disable SDMA support uint alwaysResident_ : 1; //!< Make resources resident at allocation time - uint reserved_ : 10; + uint reserved_ : 13; }; uint value_; }; diff --git a/projects/clr/rocclr/device/pal/paltimestamp.cpp b/projects/clr/rocclr/device/pal/paltimestamp.cpp index 286822caec..6d0584f131 100644 --- a/projects/clr/rocclr/device/pal/paltimestamp.cpp +++ b/projects/clr/rocclr/device/pal/paltimestamp.cpp @@ -36,7 +36,7 @@ TimeStamp::~TimeStamp() {} void TimeStamp::begin() { if (!flags_.beginIssued_) { - gpu().iCmd()->CmdWriteTimestamp(Pal::HwPipePoint::HwPipeBottom, *iMem_, + gpu().iCmd()->CmdWriteTimestamp(Pal::PipelineStageFlag::PipelineStageBottomOfPipe, *iMem_, memOffset_ + CommandStartTime * sizeof(uint64_t)); flags_.beginIssued_ = true; } @@ -44,7 +44,7 @@ void TimeStamp::begin() { void TimeStamp::end() { CondLog(!flags_.beginIssued_, "We didn't issue a begin operation!"); - gpu().iCmd()->CmdWriteTimestamp(Pal::HwPipePoint::HwPipeBottom, *iMem_, + gpu().iCmd()->CmdWriteTimestamp(Pal::PipelineStageFlag::PipelineStageBottomOfPipe, *iMem_, memOffset_ + CommandEndTime * sizeof(uint64_t)); flags_.endIssued_ = true; }