SWDEV-373334 - Use copyMetadata for blit decisions

- Check isAsync flag for small host copies on large bar as it synchronizes
- Use CopyEngine Preference hint if HMM is enabled.

Change-Id: I1ffc4b2604ed03cf5979cdc454178648c5ae5cba


[ROCm/clr commit: e0384f9f6b]
This commit is contained in:
Saleel Kudchadker
2022-12-14 14:29:16 -08:00
parent 8d648e4efe
commit ed4c04b150
+15 -4
View File
@@ -1583,9 +1583,14 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory
bool result = false;
bool rejected = false;
// Use copyEnginePreference from the copyMetadata if we have HMM enabled as top level may have
// more info on where the buffer resides
bool useCopyHint = (copyMetadata.copyEnginePreference_ == amd::CopyMetadata::SDMA) &&
dev().info().hmmSupported_;
// Fall into the ROC path for rejected transfers
if (dev().info().pcie_atomics_ && (setup_.disableCopyBufferRect_ ||
srcMemory.isHostMemDirectAccess() || dstMemory.isHostMemDirectAccess())) {
srcMemory.isHostMemDirectAccess() || dstMemory.isHostMemDirectAccess() || useCopyHint)) {
result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire,
copyMetadata);
@@ -1708,7 +1713,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
amd::ScopedLock k(lockXferOps_);
bool result = false;
if (dev().info().largeBar_ && size[0] <= kMaxD2hMemcpySize) {
if (dev().info().largeBar_ && size[0] <= kMaxD2hMemcpySize && !copyMetadata.isAsync_) {
if ((srcMemory.owner()->getHostMem() == nullptr) &&
(srcMemory.owner()->getSvmPtr() != nullptr)) {
// CPU read ahead, hence release GPU memory and force barrier to make sure L2 flush
@@ -1831,7 +1836,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
amd::ScopedLock k(lockXferOps_);
bool result = false;
if (dev().info().largeBar_ && size[0] <= kMaxH2dMemcpySize) {
if (dev().info().largeBar_ && size[0] <= kMaxH2dMemcpySize && !copyMetadata.isAsync_) {
if ((dstMemory.owner()->getHostMem() == nullptr) &&
(dstMemory.owner()->getSvmPtr() != nullptr)) {
// CPU read ahead, hence release GPU memory
@@ -2186,9 +2191,15 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds
asan = true;
#endif
#endif
// Use copyEnginePreference from the copyMetadata if we have HMM enabled as top level may have
// more info on where the buffer resides
bool useCopyHint = (copyMetadata.copyEnginePreference_ == amd::CopyMetadata::SDMA) &&
dev().info().hmmSupported_;
if (setup_.disableHwlCopyBuffer_ ||
(!srcMemory.isHostMemDirectAccess() && !dstMemory.isHostMemDirectAccess() &&
!(p2p || asan) && !ipcShared)) {
!(p2p || asan) && !ipcShared && !useCopyHint)) {
uint blitType = BlitCopyBuffer;
size_t dim = 1;
size_t globalWorkOffset[3] = {0, 0, 0};