From c1b95b09bf40744347f74708cf18d2a4c79d82da Mon Sep 17 00:00:00 2001 From: Satyanvesh Dittakavi Date: Tue, 15 Feb 2022 12:55:04 +0000 Subject: [PATCH] SWDEV-326397 - P2P copies to take SDMA path if there is no pending dispatch Change-Id: I50cfb8d77f7882151a20a1de7aaf5219b1695b7d --- rocclr/device/rocm/rocblit.cpp | 2 +- rocclr/device/rocm/rocvirtual.hpp | 1 + rocclr/utils/flags.hpp | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/rocclr/device/rocm/rocblit.cpp b/rocclr/device/rocm/rocblit.cpp index 7183b02d2f..aa7e941908 100644 --- a/rocclr/device/rocm/rocblit.cpp +++ b/rocclr/device/rocm/rocblit.cpp @@ -2051,7 +2051,7 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds amd::ScopedLock k(lockXferOps_); bool result = false; bool p2p = (&gpuMem(srcMemory).dev() != &gpuMem(dstMemory).dev()) && - (sizeIn[0] > ROC_P2P_SDMA_SIZE * Ki); + ((sizeIn[0] > ROC_P2P_SDMA_SIZE * Ki) || !gpu().IsPendingDispatch()); bool asan = false; #if defined(__clang__) #if __has_feature(address_sanitizer) diff --git a/rocclr/device/rocm/rocvirtual.hpp b/rocclr/device/rocm/rocvirtual.hpp index 8a06cd5ac3..512f123bf5 100644 --- a/rocclr/device/rocm/rocvirtual.hpp +++ b/rocclr/device/rocm/rocvirtual.hpp @@ -382,6 +382,7 @@ class VirtualGPU : public device::VirtualDevice { void enableSyncBlit() const; void hasPendingDispatch() { hasPendingDispatch_ = true; } + bool IsPendingDispatch() const { return (hasPendingDispatch_) ? true : false; } void addSystemScope() { addSystemScope_ = true; } void SetCopyCommandType(cl_command_type type) { copy_command_type_ = type; } diff --git a/rocclr/utils/flags.hpp b/rocclr/utils/flags.hpp index f22a3f5176..147349cc6b 100644 --- a/rocclr/utils/flags.hpp +++ b/rocclr/utils/flags.hpp @@ -267,7 +267,7 @@ release(bool, AMD_CPU_AFFINITY, false, \ release(bool, ROC_USE_FGS_KERNARG, true, \ "Use fine grain kernel args segment for supported asics") \ release(uint, ROC_P2P_SDMA_SIZE, 1024, \ - "The minimum size in MB for P2P transfer with SDMA") \ + "The minimum size in KB for P2P transfer with SDMA") \ release(uint, ROC_AQL_QUEUE_SIZE, 4096, \ "AQL queue size in AQL packets") \ release(bool, ROC_SKIP_KERNEL_ARG_COPY, false, \