diff --git a/projects/clr/rocclr/runtime/device/pal/palblit.cpp b/projects/clr/rocclr/runtime/device/pal/palblit.cpp index 2c817144cf..e87865ed7d 100644 --- a/projects/clr/rocclr/runtime/device/pal/palblit.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palblit.cpp @@ -1746,9 +1746,10 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory // Fall into the PAL path for rejected transfers if (setup_.disableCopyBufferRect_ || gpuMem(srcMemory).isHostMemDirectAccess() || gpuMem(dstMemory).isHostMemDirectAccess()) { - result = + if (!dev().settings().disableSdma_) { + result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire); - + } if (result) { synchronize(); return result; diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp index f4e502b4fc..101e90de41 100644 --- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp @@ -934,11 +934,8 @@ bool Device::create(Pal::IDevice* device) { palSettings->cmdBufBatchedSubmitChainLimit = 0; palSettings->disableResourceProcessingManager = true; palSettings->numScratchWavesPerCu = settings().numScratchWavesPerCu_; - - if (PAL_DISABLE_SDMA) { - // Make sure CP DMA is used only, since compute path won't work in PAL with OCL - palSettings->cpDmaCmdCopyMemoryMaxBytes = 0xFFFFFFFF; - } + // Make sure CP DMA can be used for all possible transfers + palSettings->cpDmaCmdCopyMemoryMaxBytes = 0xFFFFFFFF; // Commit the new settings for the device result = iDev()->CommitSettingsAndInit(); diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp index ffa4feec8b..bed9eafa37 100644 --- a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp @@ -145,6 +145,7 @@ Settings::Settings() { lcWavefrontSize64_ = true; enableHwP2P_ = false; imageBufferWar_ = false; + disableSdma_ = PAL_DISABLE_SDMA; } bool Settings::create(const Pal::DeviceProperties& palProp, @@ -338,6 +339,9 @@ bool Settings::create(const Pal::DeviceProperties& palProp, imageBufferWar_ = GPU_IMAGE_BUFFER_WAR; } + // Image DMA must be disabled if SDMA is disabled + imageDMA_ &= !disableSdma_; + splitSizeForWin7_ = false; #if defined(_WIN32) diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.hpp b/projects/clr/rocclr/runtime/device/pal/palsettings.hpp index fc6f63d416..65eb6f7927 100644 --- a/projects/clr/rocclr/runtime/device/pal/palsettings.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palsettings.hpp @@ -63,7 +63,8 @@ class Settings : public device::Settings { uint splitSizeForWin7_ : 1; //!< DMA flush split size for Win 7 uint enableHwP2P_ : 1; //!< Forces HW P2P path for testing uint imageBufferWar_ : 1; //!< Image buffer workaround for Gfx10 - uint reserved_ : 9; + uint disableSdma_ : 1; //!< Disable SDMA support + uint reserved_ : 8; }; uint value_; }; diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp index 187a0660ad..4e2408b4e5 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp @@ -824,7 +824,7 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs, } // Check if device has SDMA engines - if (dev().numDMAEngines() != 0 && !PAL_DISABLE_SDMA) { + if (dev().numDMAEngines() != 0 && !dev().settings().disableSdma_) { uint sdma; // If only 1 SDMA engine is available then use that one, otherwise it's a round-robin manner if ((dev().numDMAEngines() < 2) || ((idx + 1) & 0x1)) {