From 733c8d1d1c72af5ccfb4b8431ba7595189f667a4 Mon Sep 17 00:00:00 2001 From: Ioannis Assiouras Date: Thu, 1 Dec 2022 15:55:39 +0000 Subject: [PATCH] SWDEV-369581 - Convey copy API metadata to ROCclr Change-Id: I569462d6d268700d419510255e201bf7d80d6714 [ROCm/clr commit: 72b45e2a1f945e2ef65b6eb6f52df51c819537e4] --- projects/clr/rocclr/device/blit.cpp | 29 +-- projects/clr/rocclr/device/blit.hpp | 88 ++++++--- projects/clr/rocclr/device/pal/palblit.cpp | 175 ++++++++++-------- projects/clr/rocclr/device/pal/palblit.hpp | 96 +++++++--- projects/clr/rocclr/device/pal/palvirtual.cpp | 49 ++--- projects/clr/rocclr/device/pal/palvirtual.hpp | 4 +- projects/clr/rocclr/device/rocm/rocblit.cpp | 166 ++++++++++------- projects/clr/rocclr/device/rocm/rocblit.hpp | 96 +++++++--- .../clr/rocclr/device/rocm/rocvirtual.cpp | 45 +++-- .../clr/rocclr/device/rocm/rocvirtual.hpp | 4 +- projects/clr/rocclr/platform/command.hpp | 68 +++++-- 11 files changed, 535 insertions(+), 285 deletions(-) diff --git a/projects/clr/rocclr/device/blit.cpp b/projects/clr/rocclr/device/blit.cpp index 376d1996d5..3c6b5dbf35 100644 --- a/projects/clr/rocclr/device/blit.cpp +++ b/projects/clr/rocclr/device/blit.cpp @@ -32,7 +32,7 @@ HostBlitManager::HostBlitManager(VirtualDevice& vDev, Setup setup) bool HostBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { // Map the device memory to CPU visible void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly); if (NULL == src) { @@ -52,7 +52,7 @@ bool HostBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, bool HostBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, const amd::BufferRect& bufRect, const amd::BufferRect& hostRect, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { // Map source memory void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly); if (src == NULL) { @@ -82,7 +82,8 @@ bool HostBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, bool HostBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin, const amd::Coord3D& size, - size_t rowPitch, size_t slicePitch, bool entire) const { + size_t rowPitch, size_t slicePitch, bool entire, + amd::CopyMetadata copyMetadata) const { size_t startLayer = origin[2]; size_t numLayers = size[2]; if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) { @@ -148,7 +149,7 @@ bool HostBlitManager::readImage(device::Memory& srcMemory, void* dstHost, bool HostBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { uint flags = 0; if (entire) { flags = Memory::CpuWriteOnly; @@ -173,7 +174,7 @@ bool HostBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory bool HostBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory, const amd::BufferRect& hostRect, const amd::BufferRect& bufRect, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { // Map destination memory void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0); if (dst == NULL) { @@ -203,7 +204,8 @@ bool HostBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMe bool HostBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, - size_t rowPitch, size_t slicePitch, bool entire) const { + size_t rowPitch, size_t slicePitch, bool entire, + amd::CopyMetadata copyMetadata) const { uint flags = 0; if (entire) { flags = Memory::CpuWriteOnly; @@ -272,7 +274,8 @@ bool HostBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory, bool HostBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, + amd::CopyMetadata copyMetadata) const { // Map source memory void* src = srcMemory.cpuMap(vDev_, // Overlap detection @@ -302,7 +305,8 @@ bool HostBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstM bool HostBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory, const amd::BufferRect& srcRect, const amd::BufferRect& dstRect, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, + amd::CopyMetadata copyMetadata) const { // Map source memory void* src = srcMemory.cpuMap(vDev_, // Overlap detection @@ -340,7 +344,8 @@ bool HostBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& bool HostBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, - bool entire, size_t rowPitch, size_t slicePitch) const { + bool entire, size_t rowPitch, size_t slicePitch, + amd::CopyMetadata copyMetadata) const { size_t startLayer = srcOrigin[2]; size_t numLayers = size[2]; if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) { @@ -405,7 +410,8 @@ bool HostBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memor bool HostBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, - bool entire, size_t rowPitch, size_t slicePitch) const { + bool entire, size_t rowPitch, size_t slicePitch, + amd::CopyMetadata copyMetadata) const { // Map source memory void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly); if (src == NULL) { @@ -469,7 +475,8 @@ bool HostBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memor bool HostBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, + amd::CopyMetadata copyMetadata) const { size_t startLayer = srcOrigin[2]; size_t numLayers = size[2]; if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) { diff --git a/projects/clr/rocclr/device/blit.hpp b/projects/clr/rocclr/device/blit.hpp index 9c7d96f016..af2c333ae6 100644 --- a/projects/clr/rocclr/device/blit.hpp +++ b/projects/clr/rocclr/device/blit.hpp @@ -76,7 +76,9 @@ class BlitManager : public amd::HeapObject { void* dstHost, //!< Destination host memory const amd::Coord3D& origin, //!< Source origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const = 0; //! Copies a buffer object to system memory @@ -85,7 +87,9 @@ class BlitManager : public amd::HeapObject { const amd::BufferRect& bufRect, //!< Source rectangle const amd::BufferRect& hostRect, //!< Destination rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const = 0; //! Copies an image object to system memory @@ -95,7 +99,9 @@ class BlitManager : public amd::HeapObject { const amd::Coord3D& size, //!< Size of the copy region size_t rowPitch, //!< Row pitch for host memory size_t slicePitch, //!< Slice pitch for host memory - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const = 0; //! Copies system memory to a buffer object @@ -103,7 +109,9 @@ class BlitManager : public amd::HeapObject { Memory& dstMemory, //!< Destination memory object const amd::Coord3D& origin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const = 0; //! Copies system memory to a buffer object @@ -112,7 +120,9 @@ class BlitManager : public amd::HeapObject { const amd::BufferRect& hostRect, //!< Destination rectangle const amd::BufferRect& bufRect, //!< Source rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const = 0; //! Copies system memory to an image object @@ -122,7 +132,9 @@ class BlitManager : public amd::HeapObject { const amd::Coord3D& size, //!< Size of the copy region size_t rowPitch, //!< Row pitch for host memory size_t slicePitch, //!< Slice pitch for host memory - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const = 0; //! Copies a buffer object to another buffer object @@ -131,7 +143,9 @@ class BlitManager : public amd::HeapObject { const amd::Coord3D& srcOrigin, //!< Source origin const amd::Coord3D& dstOrigin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const = 0; //! Copies a buffer object to another buffer object @@ -140,7 +154,9 @@ class BlitManager : public amd::HeapObject { const amd::BufferRect& srcRect, //!< Source rectangle const amd::BufferRect& dstRect, //!< Destination rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const = 0; //! Copies an image object to a buffer object @@ -151,7 +167,9 @@ class BlitManager : public amd::HeapObject { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const = 0; //! Copies a buffer object to an image object @@ -162,7 +180,9 @@ class BlitManager : public amd::HeapObject { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const = 0; //! Copies an image object to another image object @@ -171,7 +191,9 @@ class BlitManager : public amd::HeapObject { const amd::Coord3D& srcOrigin, //!< Source origin const amd::Coord3D& dstOrigin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const = 0; //! Fills a buffer memory with a pattern data @@ -248,7 +270,9 @@ class HostBlitManager : public device::BlitManager { void* dstHost, //!< Destination host memory const amd::Coord3D& origin, //!< Source origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies a buffer object to system memory @@ -257,7 +281,9 @@ class HostBlitManager : public device::BlitManager { const amd::BufferRect& bufRect, //!< Source rectangle const amd::BufferRect& hostRect, //!< Destination rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies an image object to system memory @@ -267,7 +293,9 @@ class HostBlitManager : public device::BlitManager { const amd::Coord3D& size, //!< Size of the copy region size_t rowPitch, //!< Row pitch for host memory size_t slicePitch, //!< Slice pitch for host memory - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies system memory to a buffer object @@ -275,7 +303,9 @@ class HostBlitManager : public device::BlitManager { device::Memory& dstMemory, //!< Destination memory object const amd::Coord3D& origin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies system memory to a buffer object @@ -284,7 +314,9 @@ class HostBlitManager : public device::BlitManager { const amd::BufferRect& hostRect, //!< Destination rectangle const amd::BufferRect& bufRect, //!< Source rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies system memory to an image object @@ -294,7 +326,9 @@ class HostBlitManager : public device::BlitManager { const amd::Coord3D& size, //!< Size of the copy region size_t rowPitch, //!< Row pitch for host memory size_t slicePitch, //!< Slice pitch for host memory - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies a buffer object to another buffer object @@ -303,7 +337,9 @@ class HostBlitManager : public device::BlitManager { const amd::Coord3D& srcOrigin, //!< Source origin const amd::Coord3D& dstOrigin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies a buffer object to another buffer object @@ -312,7 +348,9 @@ class HostBlitManager : public device::BlitManager { const amd::BufferRect& srcRect, //!< Source rectangle const amd::BufferRect& dstRect, //!< Destination rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies an image object to a buffer object @@ -323,7 +361,9 @@ class HostBlitManager : public device::BlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies a buffer object to an image object @@ -334,7 +374,9 @@ class HostBlitManager : public device::BlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies an image object to another image object @@ -343,7 +385,9 @@ class HostBlitManager : public device::BlitManager { const amd::Coord3D& srcOrigin, //!< Source origin const amd::Coord3D& dstOrigin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Fills a buffer memory with a pattern data diff --git a/projects/clr/rocclr/device/pal/palblit.cpp b/projects/clr/rocclr/device/pal/palblit.cpp index 02516d9f0d..cf43d16ccf 100644 --- a/projects/clr/rocclr/device/pal/palblit.cpp +++ b/projects/clr/rocclr/device/pal/palblit.cpp @@ -117,12 +117,12 @@ bool DmaBlitManager::readMemoryStaged(Memory& srcMemory, void* dstHost, Memory** bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { // Use host copy if memory has direct access if (setup_.disableReadBuffer_ || (gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) { gpu().releaseGpuMemoryFence(); - return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire); + return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata); } else { size_t srcSize = size[0]; size_t offset = 0; @@ -206,12 +206,14 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, const amd::BufferRect& bufRect, const amd::BufferRect& hostRect, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, + amd::CopyMetadata copyMetadata) const { // Use host copy if memory has direct access if (setup_.disableReadBufferRect_ || (gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) { gpu().releaseGpuMemoryFence(); - return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire); + return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire, + copyMetadata); } else { Memory& xferBuf = dev().xferRead().acquire(); @@ -259,16 +261,16 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, bool DmaBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin, const amd::Coord3D& size, size_t rowPitch, size_t slicePitch, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { gpu().releaseGpuMemoryFence(); if (setup_.disableReadImage_) { return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, - entire); + entire, copyMetadata); } else { //! @todo Add HW accelerated path return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, - entire); + entire, copyMetadata); } return true; @@ -327,14 +329,14 @@ bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, M bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { // Use host copy if memory has direct access or it's persistent if (setup_.disableWriteBuffer_ || (gpuMem(dstMemory).isHostMemDirectAccess() && (gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) || gpuMem(dstMemory).isPersistentDirectMap()) { gpu().releaseGpuMemoryFence(); - return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire); + return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata); } else { size_t dstSize = size[0]; size_t offset = 0; @@ -416,14 +418,15 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory, bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory, const amd::BufferRect& hostRect, const amd::BufferRect& bufRect, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { // Use host copy if memory has direct access or it's persistent if (setup_.disableWriteBufferRect_ || (dstMemory.isHostMemDirectAccess() && (gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) || gpuMem(dstMemory).isPersistentDirectMap()) { gpu().releaseGpuMemoryFence(); - return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire); + return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire, + copyMetadata); } else { Memory& xferBuf = gpu().xferWrite().Acquire(std::min(gpu().xferWrite().MaxSize(), size[0])); @@ -473,15 +476,16 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, - size_t rowPitch, size_t slicePitch, bool entire) const { + size_t rowPitch, size_t slicePitch, bool entire, + amd::CopyMetadata copyMetadata) const { gpu().releaseGpuMemoryFence(); if (setup_.disableWriteImage_) { return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, - entire); + entire, copyMetadata); } else { //! @todo Add HW accelerated path return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, - entire); + entire, copyMetadata); } return true; @@ -489,12 +493,14 @@ bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory, bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, + amd::CopyMetadata copyMetadata) const { if (setup_.disableCopyBuffer_ || (gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable() && !dev().settings().apuSystem_ && gpuMem(dstMemory).isHostMemDirectAccess())) { gpu().releaseGpuMemoryFence(); - return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size); + return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, false, + copyMetadata); } else { return gpuMem(srcMemory).partialMemCopyTo(gpu(), srcOrigin, dstOrigin, size, gpuMem(dstMemory)); } @@ -504,12 +510,14 @@ bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMe bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory, const amd::BufferRect& srcRect, const amd::BufferRect& dstRect, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, + amd::CopyMetadata copyMetadata) const { if (setup_.disableCopyBufferRect_ || (gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable() && gpuMem(dstMemory).isHostMemDirectAccess())) { gpu().releaseGpuMemoryFence(); - return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire); + return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire, + copyMetadata); } else { size_t srcOffset; size_t dstOffset; @@ -580,12 +588,12 @@ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& d bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, bool entire, size_t rowPitch, - size_t slicePitch) const { + size_t slicePitch, amd::CopyMetadata copyMetadata) const { bool result = false; if (setup_.disableCopyImageToBuffer_) { gpu().releaseGpuMemoryFence(); result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); } else { // Use PAL path for a transfer result = @@ -595,7 +603,7 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory if (completeOperation_ && !result) { gpu().releaseGpuMemoryFence(); result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); } } @@ -605,12 +613,12 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, bool entire, size_t rowPitch, - size_t slicePitch) const { + size_t slicePitch, amd::CopyMetadata copyMetadata) const { bool result = false; if (setup_.disableCopyBufferToImage_) { gpu().releaseGpuMemoryFence(); result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); } else { // Use PAL path for a transfer result = @@ -620,7 +628,7 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory if (completeOperation_ && !result) { gpu().releaseGpuMemoryFence(); result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); } } @@ -629,16 +637,19 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory bool DmaBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, + amd::CopyMetadata copyMetadata) const { bool result = false; gpu().releaseGpuMemoryFence(); if (setup_.disableCopyImage_) { - return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire); + return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire, + copyMetadata); } else { //! @todo Add HW accelerated path - return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire); + return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire, + copyMetadata); } return result; @@ -785,7 +796,8 @@ const uint RejectedFormatChannelTotal = sizeof(RejectedOrder) / sizeof(FormatCon bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, - bool entire, size_t rowPitch, size_t slicePitch) const { + bool entire, size_t rowPitch, size_t slicePitch, + amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; static const bool CopyRect = false; @@ -796,7 +808,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem if (setup_.disableCopyBufferToImage_) { result = DmaBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); synchronize(); return result; } @@ -808,7 +820,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem // but there are restriciton with older hardware if (dev().settings().imageDMA_) { result = DmaBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); if (result) { synchronize(); return result; @@ -899,7 +911,8 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem // Step 2. Initiate compute transfer with all staging buffers for (uint i = 0; i < MaxXferBuffers; ++i) { if (copySize > 0) { - if (!copyBufferToImageKernel(*xferBuf[i], dstMemory, xferSrc, dst, xferRect, false)) { + if (!copyBufferToImageKernel(*xferBuf[i], dstMemory, xferSrc, dst, xferRect, false, + 0UL, 0UL, copyMetadata)) { transfer = false; break; } @@ -941,7 +954,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem if (!result) { result = copyBufferToImageKernel(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire, - rowPitch, slicePitch); + rowPitch, slicePitch, copyMetadata); } synchronize(); @@ -1045,7 +1058,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, bool entire, - size_t rowPitch, size_t slicePitch) const { + size_t rowPitch, size_t slicePitch, + amd::CopyMetadata copyMetadata) const { bool rejected = false; Memory* dstView = &gpuMem(dstMemory); bool releaseView = false; @@ -1084,7 +1098,7 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory, // Fall into the host path if the image format was rejected if (rejected) { return HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire); + entire, 0UL, 0UL, copyMetadata); } // Use a common blit type with three dimensions by default @@ -1183,7 +1197,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory, bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, - bool entire, size_t rowPitch, size_t slicePitch) const { + bool entire, size_t rowPitch, size_t slicePitch, + amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; static const bool CopyRect = false; @@ -1194,7 +1209,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem if (setup_.disableCopyImageToBuffer_) { result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); synchronize(); return result; } @@ -1209,7 +1224,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem if (dev().settings().imageDMA_ && gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical) { result = DmaBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); if (result) { synchronize(); return result; @@ -1280,7 +1295,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem for (uint i = 0; i < MaxXferBuffers; ++i) { if (copySizeTmp > 0) { if (!copyImageToBufferKernel(srcMemory, *xferBuf[i], srcTmp, xferDst, xferRectTmp, - false)) { + false, 0UL, 0UL, copyMetadata)) { transfer = false; break; } @@ -1352,7 +1367,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem if (!result) { result = copyImageToBufferKernel(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire, - rowPitch, slicePitch); + rowPitch, slicePitch, copyMetadata); } synchronize(); @@ -1365,7 +1380,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, bool entire, - size_t rowPitch, size_t slicePitch) const { + size_t rowPitch, size_t slicePitch, + amd::CopyMetadata copyMetadata) const { bool rejected = false; Memory* srcView = &gpuMem(srcMemory); bool releaseView = false; @@ -1404,7 +1420,7 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory, // Fall into the host path if the image format was rejected if (rejected) { return HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire); + entire, 0UL, 0UL, copyMetadata); } uint blitType = BlitCopyImageToBuffer; @@ -1506,7 +1522,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory, bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, + amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool rejected = false; Memory* srcView = &gpuMem(srcMemory); @@ -1553,7 +1570,8 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst // Fall into the host path for the entire 2D copy or // if the image format was rejected if (rejected) { - result = HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire); + result = HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire, + copyMetadata); synchronize(); return result; } @@ -1663,7 +1681,8 @@ void FindPinSize(size_t& pinSize, const amd::Coord3D& size, size_t& rowPitch, si bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin, const amd::Coord3D& size, - size_t rowPitch, size_t slicePitch, bool entire) const { + size_t rowPitch, size_t slicePitch, bool entire, + amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -1673,7 +1692,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost, gpu().releaseGpuMemoryFence(); result = - HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire); + HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire, + copyMetadata); synchronize(); return result; } else { @@ -1686,7 +1706,7 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost, if (amdMemory == NULL) { // Force SW copy result = HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, - entire); + entire, copyMetadata); synchronize(); return result; } @@ -1699,7 +1719,7 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost, // Copy image to buffer result = copyImageToBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire, rowPitch, - slicePitch); + slicePitch, copyMetadata); // Add pinned memory for a later release gpu().addPinnedMem(amdMemory); @@ -1712,7 +1732,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost, bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, - size_t rowPitch, size_t slicePitch, bool entire) const { + size_t rowPitch, size_t slicePitch, bool entire, + amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -1722,7 +1743,8 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor gpu().releaseGpuMemoryFence(); result = - HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire); + HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire, + copyMetadata); synchronize(); return result; } else { @@ -1738,7 +1760,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor if (amdMemory == nullptr) { // Force SW copy result = HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, - entire); + entire, copyMetadata); synchronize(); return result; } @@ -1756,7 +1778,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor // Copy image to buffer result = copyBufferToImage(*srcMemory, dstMemory, srcOrigin, origin, size, entire, rowPitch, - slicePitch); + slicePitch, copyMetadata); if (pinned) { // Add pinned memory for a later release @@ -1774,7 +1796,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory, const amd::BufferRect& srcRectIn, const amd::BufferRect& dstRectIn, const amd::Coord3D& sizeIn, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; bool rejected = false; @@ -1784,7 +1806,8 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory gpuMem(dstMemory).isHostMemDirectAccess()) { if (!dev().settings().disableSdma_) { result = - DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire); + DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire, + copyMetadata); } if (result) { synchronize(); @@ -1888,7 +1911,7 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -1897,7 +1920,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, (gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) { gpu().releaseGpuMemoryFence(); - result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire); + result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata); synchronize(); return result; } else { @@ -1909,7 +1932,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, if (amdMemory == NULL) { // Force SW copy - result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire); + result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata); synchronize(); return result; } @@ -1921,12 +1944,12 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, Memory* dstMemory = dev().getGpuMemory(amdMemory); // Copy image to buffer - result = copyBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire); + result = copyBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire, copyMetadata); // Add pinned memory for a later release gpu().addPinnedMem(amdMemory); } else { - result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire); + result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata); } } @@ -1938,7 +1961,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, const amd::BufferRect& bufRect, const amd::BufferRect& hostRect, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -1947,7 +1970,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, (gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) { gpu().releaseGpuMemoryFence(); - result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire); + result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire, + copyMetadata); synchronize(); return result; } else { @@ -1957,7 +1981,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, if (amdMemory == NULL) { // Force SW copy - result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire); + result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire, + copyMetadata); synchronize(); return result; } @@ -1973,7 +1998,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, Memory* dstMemory = dev().getGpuMemory(amdMemory); // Copy image to buffer - result = copyBufferRect(srcMemory, *dstMemory, bufRect, rect, size, entire); + result = copyBufferRect(srcMemory, *dstMemory, bufRect, rect, size, entire, + copyMetadata); // Add pinned memory for a later release gpu().addPinnedMem(amdMemory); @@ -1986,7 +2012,7 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -1997,7 +2023,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo (gpuMem(dstMemory).memoryType() == Resource::Persistent)) { gpu().releaseGpuMemoryFence(); - result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire); + result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata); synchronize(); return result; } else { @@ -2010,7 +2036,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo if (amdMemory == NULL) { // Force SW copy - result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire); + result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata); synchronize(); return result; } @@ -2022,12 +2048,12 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo Memory* srcMemory = dev().getGpuMemory(amdMemory); // Copy buffer rect - result = copyBuffer(*srcMemory, dstMemory, srcOrigin, origin, size, entire); + result = copyBuffer(*srcMemory, dstMemory, srcOrigin, origin, size, entire, copyMetadata); // Add pinned memory for a later release gpu().addPinnedMem(amdMemory); } else { - result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire); + result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata); } } @@ -2039,7 +2065,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory, const amd::BufferRect& hostRect, const amd::BufferRect& bufRect, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -2050,7 +2076,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst gpuMem(dstMemory).isPersistentDirectMap()) { gpu().releaseGpuMemoryFence(); - result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire); + result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire, + copyMetadata); synchronize(); return result; } else { @@ -2061,7 +2088,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst if (amdMemory == NULL) { // Force SW copy result = - HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire); + HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire, + copyMetadata); synchronize(); return result; } @@ -2080,7 +2108,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst rect.end_ = hostRect.end_; // Copy buffer rect - result = copyBufferRect(*srcMemory, dstMemory, rect, bufRect, size, entire); + result = copyBufferRect(*srcMemory, dstMemory, rect, bufRect, size, entire, + copyMetadata); // Add pinned memory for a later release gpu().addPinnedMem(amdMemory); @@ -2172,7 +2201,7 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern, bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, - const amd::Coord3D& sizeIn, bool entire) const { + const amd::Coord3D& sizeIn, bool entire, amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -2246,7 +2275,7 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds address parameters = kernels_[blitType]->parameters().values(); result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters); } else { - result = DmaBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire); + result = DmaBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire, copyMetadata); } synchronize(); diff --git a/projects/clr/rocclr/device/pal/palblit.hpp b/projects/clr/rocclr/device/pal/palblit.hpp index 260c9ca257..f54bb46e31 100644 --- a/projects/clr/rocclr/device/pal/palblit.hpp +++ b/projects/clr/rocclr/device/pal/palblit.hpp @@ -57,7 +57,9 @@ class DmaBlitManager : public device::HostBlitManager { void* dstHost, //!< Destination host memory const amd::Coord3D& origin, //!< Source origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies a buffer object to system memory @@ -66,7 +68,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::BufferRect& bufRect, //!< Source rectangle const amd::BufferRect& hostRect, //!< Destination rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies an image object to system memory @@ -76,7 +80,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::Coord3D& size, //!< Size of the copy region size_t rowPitch, //!< Row pitch for host memory size_t slicePitch, //!< Slice pitch for host memory - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies system memory to a buffer object @@ -84,7 +90,9 @@ class DmaBlitManager : public device::HostBlitManager { device::Memory& dstMemory, //!< Destination memory object const amd::Coord3D& origin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies system memory to a buffer object @@ -93,7 +101,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::BufferRect& hostRect, //!< Destination rectangle const amd::BufferRect& bufRect, //!< Source rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies system memory to an image object @@ -103,7 +113,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::Coord3D& size, //!< Size of the copy region size_t rowPitch, //!< Row pitch for host memory size_t slicePitch, //!< Slice pitch for host memory - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies a buffer object to another buffer object @@ -112,7 +124,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::Coord3D& srcOrigin, //!< Source origin const amd::Coord3D& dstOrigin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies a buffer object to another buffer object @@ -121,7 +135,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::BufferRect& srcRect, //!< Source rectangle const amd::BufferRect& dstRect, //!< Destination rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies an image object to a buffer object @@ -132,7 +148,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies a buffer object to an image object @@ -143,7 +161,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies an image object to another image object @@ -152,7 +172,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::Coord3D& srcOrigin, //!< Source origin const amd::Coord3D& dstOrigin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Stream memory write operation - Write a 'value' at 'memory'. @@ -269,7 +291,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::BufferRect& srcRectIn, //!< Source rectangle const amd::BufferRect& dstRectIn, //!< Destination rectangle const amd::Coord3D& sizeIn, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies a buffer object to system memory @@ -277,7 +301,9 @@ class KernelBlitManager : public DmaBlitManager { void* dstHost, //!< Destination host memory const amd::Coord3D& origin, //!< Source origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies a buffer object to system memory @@ -286,7 +312,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::BufferRect& bufRect, //!< Source rectangle const amd::BufferRect& hostRect, //!< Destination rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies system memory to a buffer object @@ -294,7 +322,9 @@ class KernelBlitManager : public DmaBlitManager { device::Memory& dstMemory, //!< Destination memory object const amd::Coord3D& origin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies system memory to a buffer object @@ -303,7 +333,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::BufferRect& hostRect, //!< Destination rectangle const amd::BufferRect& bufRect, //!< Source rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies a buffer object to an image object @@ -312,7 +344,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& srcOrigin, //!< Source origin const amd::Coord3D& dstOrigin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies a buffer object to an image object @@ -323,7 +357,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies an image object to a buffer object @@ -334,7 +370,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies an image object to another image object @@ -343,7 +381,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& srcOrigin, //!< Source origin const amd::Coord3D& dstOrigin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies an image object to system memory @@ -353,7 +393,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& size, //!< Size of the copy region size_t rowPitch, //!< Row pitch for host memory size_t slicePitch, //!< Slice pitch for host memory - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies system memory to an image object @@ -363,7 +405,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& size, //!< Size of the copy region size_t rowPitch, //!< Row pitch for host memory size_t slicePitch, //!< Slice pitch for host memory - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Fills a buffer memory with a pattern data @@ -433,7 +477,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies an image object to a buffer object @@ -444,7 +490,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Creates a program for all blit operations diff --git a/projects/clr/rocclr/device/pal/palvirtual.cpp b/projects/clr/rocclr/device/pal/palvirtual.cpp index 9c917681d1..c892a48732 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/device/pal/palvirtual.cpp @@ -1200,7 +1200,7 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) { // Accelerated transfer without pinning amd::Coord3D dstOrigin(offset); result = blitMgr().copyBuffer(*memory, *hostMemory, origin, dstOrigin, size, - vcmd.isEntireMemory()); + vcmd.isEntireMemory(), vcmd.copyMetadata()); } else { // The logic below will perform 2 step copy to make sure memory pinning doesn't // occur on the first unaligned page, because in Windows memory manager can @@ -1218,11 +1218,11 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) { } // Make first step transfer if (partial > 0) { - result = blitMgr().readBuffer(*memory, vcmd.destination(), origin, partial); + result = blitMgr().readBuffer(*memory, vcmd.destination(), origin, partial, false, vcmd.copyMetadata()); } // Second step transfer if something left to copy if (partial < size[0]) { - result &= blitMgr().readBuffer(*memory, tmpHost, origin[0] + partial, size[0] - partial); + result &= blitMgr().readBuffer(*memory, tmpHost, origin[0] + partial, size[0] - partial, false, vcmd.copyMetadata()); } } if (nullptr != bufferFromImage) { @@ -1237,10 +1237,10 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) { vcmd.hostRect().slicePitch_); if (hostMemory != nullptr) { result = blitMgr().copyBufferRect(*memory, *hostMemory, vcmd.bufRect(), hostbufferRect, - vcmd.size(), vcmd.isEntireMemory()); + vcmd.size(), vcmd.isEntireMemory(), vcmd.copyMetadata()); } else { result = blitMgr().readBufferRect(*memory, vcmd.destination(), vcmd.bufRect(), - vcmd.hostRect(), vcmd.size(), vcmd.isEntireMemory()); + vcmd.hostRect(), vcmd.size(), vcmd.isEntireMemory(), vcmd.copyMetadata()); } } break; case CL_COMMAND_READ_IMAGE: @@ -1255,7 +1255,7 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) { // Copy memory from the original image buffer into the backing store image result = blitMgr().copyBufferToImage(*buffer, *imageBuffer->CopyImageBuffer(), offs, offs, image->getRegion(), true, - image->getRowPitch(), image->getSlicePitch()); + image->getRowPitch(), image->getSlicePitch(), vcmd.copyMetadata()); } } if (hostMemory != nullptr) { @@ -1263,10 +1263,12 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) { amd::Coord3D dstOrigin(offset); result = blitMgr().copyImageToBuffer(*memory, *hostMemory, vcmd.origin(), dstOrigin, vcmd.size(), - vcmd.isEntireMemory(), vcmd.rowPitch(), vcmd.slicePitch()); + vcmd.isEntireMemory(), vcmd.rowPitch(), vcmd.slicePitch(), + vcmd.copyMetadata()); } else { result = blitMgr().readImage(*memory, vcmd.destination(), vcmd.origin(), vcmd.size(), - vcmd.rowPitch(), vcmd.slicePitch(), vcmd.isEntireMemory()); + vcmd.rowPitch(), vcmd.slicePitch(), vcmd.isEntireMemory(), + vcmd.copyMetadata()); } break; default: @@ -1331,7 +1333,7 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) { // Accelerated transfer without pinning amd::Coord3D srcOrigin(offset); result = blitMgr().copyBuffer(*hostMemory, *memory, srcOrigin, origin, size, - vcmd.isEntireMemory()); + vcmd.isEntireMemory(), vcmd.copyMetadata()); } else { // The logic below will perform 2 step copy to make sure memory pinning doesn't // occur on the first unaligned page, because in Windows memory manager can @@ -1349,11 +1351,11 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) { } // Make first step transfer if (partial > 0) { - result = blitMgr().writeBuffer(vcmd.source(), *memory, origin, partial); + result = blitMgr().writeBuffer(vcmd.source(), *memory, origin, partial, false, vcmd.copyMetadata()); } // Second step transfer if something left to copy if (partial < size[0]) { - result &= blitMgr().writeBuffer(tmpHost, *memory, origin[0] + partial, size[0] - partial); + result &= blitMgr().writeBuffer(tmpHost, *memory, origin[0] + partial, size[0] - partial, false, vcmd.copyMetadata()); } } if (nullptr != bufferFromImage) { @@ -1368,10 +1370,10 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) { vcmd.hostRect().slicePitch_); if (hostMemory != nullptr) { result = blitMgr().copyBufferRect(*hostMemory, *memory, hostbufferRect, vcmd.bufRect(), - vcmd.size(), vcmd.isEntireMemory()); + vcmd.size(), vcmd.isEntireMemory(), vcmd.copyMetadata()); } else { result = blitMgr().writeBufferRect(vcmd.source(), *memory, vcmd.hostRect(), vcmd.bufRect(), - vcmd.size(), vcmd.isEntireMemory()); + vcmd.size(), vcmd.isEntireMemory(), vcmd.copyMetadata()); } } break; case CL_COMMAND_WRITE_IMAGE: @@ -1380,10 +1382,12 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) { amd::Coord3D srcOrigin(offset); result = blitMgr().copyBufferToImage(*hostMemory, *memory, srcOrigin, vcmd.origin(), vcmd.size(), - vcmd.isEntireMemory(), vcmd.rowPitch(), vcmd.slicePitch()); + vcmd.isEntireMemory(), vcmd.rowPitch(), vcmd.slicePitch(), + vcmd.copyMetadata()); } else { result = blitMgr().writeImage(vcmd.source(), *memory, vcmd.origin(), vcmd.size(), - vcmd.rowPitch(), vcmd.slicePitch(), vcmd.isEntireMemory()); + vcmd.rowPitch(), vcmd.slicePitch(), vcmd.isEntireMemory(), + vcmd.copyMetadata()); } break; default: @@ -1404,7 +1408,8 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) { bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memory& dstMem, bool entire, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, - const amd::BufferRect& srcRect, const amd::BufferRect& dstRect) { + const amd::BufferRect& srcRect, const amd::BufferRect& dstRect, + amd::CopyMetadata copyMetadata) { // Translate memory references and ensure cache up-to-date pal::Memory* dstMemory = dev().getGpuMemory(&dstMem); pal::Memory* srcMemory = dev().getGpuMemory(&srcMem); @@ -1464,7 +1469,7 @@ bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memo } result = blitMgr().copyBuffer(*srcMemory, *dstMemory, realSrcOrigin, realDstOrigin, realSize, - entire); + entire, copyMetadata); if (nullptr != bufferFromImageSrc) { bufferFromImageSrc->release(); @@ -1474,18 +1479,18 @@ bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memo } } break; case CL_COMMAND_COPY_BUFFER_RECT: - result = blitMgr().copyBufferRect(*srcMemory, *dstMemory, srcRect, dstRect, size, entire); + result = blitMgr().copyBufferRect(*srcMemory, *dstMemory, srcRect, dstRect, size, entire, copyMetadata); break; case CL_COMMAND_COPY_IMAGE_TO_BUFFER: result = - blitMgr().copyImageToBuffer(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire); + blitMgr().copyImageToBuffer(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire, 0UL, 0UL, copyMetadata); break; case CL_COMMAND_COPY_BUFFER_TO_IMAGE: result = - blitMgr().copyBufferToImage(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire); + blitMgr().copyBufferToImage(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire, 0UL, 0UL, copyMetadata); break; case CL_COMMAND_COPY_IMAGE: - result = blitMgr().copyImage(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire); + result = blitMgr().copyImage(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire, copyMetadata); break; default: LogError("Unsupported command type for memory copy!"); @@ -1512,7 +1517,7 @@ void VirtualGPU::submitCopyMemory(amd::CopyMemoryCommand& vcmd) { bool entire = vcmd.isEntireMemory(); if (!copyMemory(type, vcmd.source(), vcmd.destination(), entire, vcmd.srcOrigin(), - vcmd.dstOrigin(), vcmd.size(), vcmd.srcRect(), vcmd.dstRect())) { + vcmd.dstOrigin(), vcmd.size(), vcmd.srcRect(), vcmd.dstRect(), vcmd.copyMetadata())) { vcmd.setStatus(CL_INVALID_OPERATION); } diff --git a/projects/clr/rocclr/device/pal/palvirtual.hpp b/projects/clr/rocclr/device/pal/palvirtual.hpp index bacc2b400d..e5d687daee 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/device/pal/palvirtual.hpp @@ -630,7 +630,9 @@ class VirtualGPU : public device::VirtualDevice { const amd::Coord3D& dstOrigin, //!< destination memory object const amd::Coord3D& size, //!< copy size const amd::BufferRect& srcRect, //!< region of source for copy - const amd::BufferRect& dstRect //!< region of destination for copy + const amd::BufferRect& dstRect, //!< region of destination for copy + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ); void PrintChildren(const HSAILKernel& hsaKernel, //!< The parent HSAIL kernel diff --git a/projects/clr/rocclr/device/rocm/rocblit.cpp b/projects/clr/rocclr/device/rocm/rocblit.cpp index bf94cb2527..864a3a9ab2 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.cpp +++ b/projects/clr/rocclr/device/rocm/rocblit.cpp @@ -61,7 +61,7 @@ bool DmaBlitManager::readMemoryStaged(Memory& srcMemory, void* dstHost, Memory& bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { // HSA copy functionality with a possible async operation gpu().releaseGpuMemoryFence(kSkipCpuWait); @@ -70,7 +70,7 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) { // Stall GPU before CPU access gpu().Barriers().WaitCurrent(); - return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire); + return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata); } else { size_t srcSize = size[0]; size_t offset = 0; @@ -150,7 +150,8 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, const amd::BufferRect& bufRect, const amd::BufferRect& hostRect, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, + amd::CopyMetadata copyMetadata) const { // HSA copy functionality with a possible async operation gpu().releaseGpuMemoryFence(); @@ -159,7 +160,7 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) { // Stall GPU before CPU access gpu().Barriers().WaitCurrent(); - return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire); + return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire, copyMetadata); } else { Memory& xferBuf = dev().xferRead().acquire(); address staging = xferBuf.getDeviceMemory(); @@ -190,17 +191,17 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, bool DmaBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin, const amd::Coord3D& size, size_t rowPitch, size_t slicePitch, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { // HSA copy functionality with a possible async operation gpu().releaseGpuMemoryFence(); if (setup_.disableReadImage_) { return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, - entire); + entire, copyMetadata); } else { //! @todo Add HW accelerated path return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, - entire); + entire, copyMetadata); } return true; @@ -222,13 +223,13 @@ bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, M bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { // Use host copy if memory has direct access if (setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess() || gpuMem(dstMemory).IsPersistentDirectMap()) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); - return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire); + return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata); } else { // HSA copy functionality with a possible async operation gpu().releaseGpuMemoryFence(kSkipCpuWait); @@ -314,14 +315,14 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory, bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory, const amd::BufferRect& hostRect, const amd::BufferRect& bufRect, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { // HSA copy functionality with a possible async operation gpu().releaseGpuMemoryFence(); // Use host copy if memory has direct access if (setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess() || gpuMem(dstMemory).IsPersistentDirectMap()) { - return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire); + return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire, copyMetadata); } else { Memory& xferBuf = dev().xferWrite().acquire(); address staging = xferBuf.getDeviceMemory(); @@ -352,17 +353,18 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, - size_t rowPitch, size_t slicePitch, bool entire) const { + size_t rowPitch, size_t slicePitch, bool entire, + amd::CopyMetadata copyMetadata) const { // HSA copy functionality with a possible async operation gpu().releaseGpuMemoryFence(); if (setup_.disableWriteImage_) { return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, - entire); + entire, copyMetadata); } else { //! @todo Add HW accelerated path return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, - entire); + entire, copyMetadata); } return true; @@ -370,13 +372,13 @@ bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory, bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, amd::CopyMetadata copyMetadata) const { if (setup_.disableCopyBuffer_ || (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() && (dev().agent_profile() != HSA_PROFILE_FULL) && dstMemory.isHostMemDirectAccess())) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); - return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size); + return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, false, copyMetadata); } else { return hsaCopy(gpuMem(srcMemory), gpuMem(dstMemory), srcOrigin, dstOrigin, size); } @@ -387,13 +389,14 @@ bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMe // ================================================================================================ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory, const amd::BufferRect& srcRect, const amd::BufferRect& dstRect, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, + amd::CopyMetadata copyMetadata) const { if (setup_.disableCopyBufferRect_ || (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() && dstMemory.isHostMemDirectAccess())) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); - return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire); + return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire, copyMetadata); } else { gpu().releaseGpuMemoryFence(kSkipCpuWait); @@ -500,7 +503,7 @@ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& d bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, bool entire, size_t rowPitch, - size_t slicePitch) const { + size_t slicePitch, amd::CopyMetadata copyMetadata) const { // HSA copy functionality with a possible async operation, hence make sure GPU is done gpu().releaseGpuMemoryFence(); @@ -508,7 +511,7 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory if (setup_.disableCopyImageToBuffer_) { result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); } else { Image& srcImage = static_cast(srcMemory); Buffer& dstBuffer = static_cast(dstMemory); @@ -534,7 +537,7 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory // Check if a HostBlit transfer is required if (completeOperation_ && !result) { result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); } } @@ -544,7 +547,7 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, bool entire, size_t rowPitch, - size_t slicePitch) const { + size_t slicePitch, amd::CopyMetadata copyMetadata) const { // HSA copy functionality with a possible async operation, hence make sure GPU is done gpu().releaseGpuMemoryFence(); @@ -552,7 +555,7 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory if (setup_.disableCopyBufferToImage_) { result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); } else { Buffer& srcBuffer = static_cast(srcMemory); Image& dstImage = static_cast(dstMemory); @@ -579,7 +582,7 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory // Check if a HostBlit tran sfer is required if (completeOperation_ && !result) { result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); } } @@ -588,17 +591,20 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory bool DmaBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, + amd::CopyMetadata copyMetadata) const { // HSA copy functionality with a possible async operation, hence make sure GPU is done gpu().releaseGpuMemoryFence(); bool result = false; if (setup_.disableCopyImage_) { - return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire); + return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire, + copyMetadata); } else { //! @todo Add HW accelerated path - return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire); + return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire, + copyMetadata); } return result; @@ -878,7 +884,8 @@ const uint RejectedFormatChannelTotal = sizeof(RejectedOrder) / sizeof(FormatCon bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, - bool entire, size_t rowPitch, size_t slicePitch) const { + bool entire, size_t rowPitch, size_t slicePitch, + amd::CopyMetadata copyMetadata) const { guarantee((dev().info().imageSupport_ != false), "Image not supported on this device"); @@ -890,7 +897,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem if (setup_.disableCopyBufferToImage_) { result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); synchronize(); return result; } @@ -902,7 +909,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem // but there are restriciton with older hardware if (dev().settings().imageDMA_) { result = DmaBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); if (result) { synchronize(); return result; @@ -912,7 +919,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem if (!result) { result = copyBufferToImageKernel(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire, - rowPitch, slicePitch); + rowPitch, slicePitch, copyMetadata); } synchronize(); @@ -949,7 +956,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, bool entire, - size_t rowPitch, size_t slicePitch) const { + size_t rowPitch, size_t slicePitch, + amd::CopyMetadata copyMetadata) const { guarantee((dev().info().imageSupport_ != false), "Image not supported on this device"); @@ -995,7 +1003,7 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory, // Fall into the host path if the image format was rejected if (rejected) { return DmaBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); } // Use a common blit type with three dimensions by default @@ -1096,7 +1104,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory, bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, - bool entire, size_t rowPitch, size_t slicePitch) const { + bool entire, size_t rowPitch, size_t slicePitch, + amd::CopyMetadata copyMetadata) const { guarantee((dev().info().imageSupport_ != false), "Image not supported on this device"); @@ -1108,7 +1117,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem if (setup_.disableCopyImageToBuffer_) { result = DmaBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); synchronize(); return result; } @@ -1122,7 +1131,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem // single step SDMA is causing corruption and the cause is under investigation if (dev().settings().imageDMA_) { result = DmaBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); if (result) { synchronize(); return result; @@ -1132,7 +1141,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem if (!result) { result = copyImageToBufferKernel(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire, - rowPitch, slicePitch); + rowPitch, slicePitch, copyMetadata); } synchronize(); @@ -1145,7 +1154,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, bool entire, - size_t rowPitch, size_t slicePitch) const { + size_t rowPitch, size_t slicePitch, + amd::CopyMetadata copyMetadata) const { guarantee((dev().info().imageSupport_ != false), "Image not supported on this device"); @@ -1190,7 +1200,7 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory, // Fall into the host path if the image format was rejected if (rejected) { return DmaBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, - entire, rowPitch, slicePitch); + entire, rowPitch, slicePitch, copyMetadata); } uint blitType = BlitCopyImageToBuffer; @@ -1296,7 +1306,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory, bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, - const amd::Coord3D& size, bool entire) const { + const amd::Coord3D& size, bool entire, + amd::CopyMetadata copyMetadata) const { guarantee((dev().info().imageSupport_ != false), "Image not supported on this device"); @@ -1348,7 +1359,8 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst // Fall into the host path for the entire 2D copy or // if the image format was rejected if (rejected) { - result = DmaBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire); + result = DmaBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire, + copyMetadata); synchronize(); return result; } @@ -1460,7 +1472,8 @@ void FindPinSize(size_t& pinSize, const amd::Coord3D& size, size_t& rowPitch, si bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin, const amd::Coord3D& size, - size_t rowPitch, size_t slicePitch, bool entire) const { + size_t rowPitch, size_t slicePitch, bool entire, + amd::CopyMetadata copyMetadata) const { guarantee((dev().info().imageSupport_ != false), "Image not supported on this device"); @@ -1471,7 +1484,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost, if (setup_.disableReadImage_ || (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); - result = HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire); + result = HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire, + copyMetadata); synchronize(); return result; } else { @@ -1484,7 +1498,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost, if (amdMemory == nullptr) { // Force SW copy result = - DmaBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire); + DmaBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire, + copyMetadata); synchronize(); return result; } @@ -1497,7 +1512,7 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost, // Copy image to buffer result = copyImageToBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire, rowPitch, - slicePitch); + slicePitch, copyMetadata); // Add pinned memory for a later release gpu().addPinnedMem(amdMemory); @@ -1510,7 +1525,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost, bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, - size_t rowPitch, size_t slicePitch, bool entire) const { + size_t rowPitch, size_t slicePitch, bool entire, + amd::CopyMetadata copyMetadata) const { guarantee((dev().info().imageSupport_ != false), "Image not supported on this device"); @@ -1521,7 +1537,8 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor if (setup_.disableWriteImage_ || dstMemory.isHostMemDirectAccess()) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); - result = HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire); + result = HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire, + copyMetadata); synchronize(); return result; } else { @@ -1534,7 +1551,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor if (amdMemory == nullptr) { // Force SW copy result = DmaBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, - entire); + entire, copyMetadata); synchronize(); return result; } @@ -1547,7 +1564,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor // Copy image to buffer result = copyBufferToImage(*srcMemory, dstMemory, srcOrigin, origin, size, entire, rowPitch, - slicePitch); + slicePitch, copyMetadata); // Add pinned memory for a later release gpu().addPinnedMem(amdMemory); @@ -1561,7 +1578,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory, const amd::BufferRect& srcRectIn, const amd::BufferRect& dstRectIn, const amd::Coord3D& sizeIn, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; bool rejected = false; @@ -1569,7 +1586,8 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory // Fall into the ROC path for rejected transfers if (dev().info().pcie_atomics_ && (setup_.disableCopyBufferRect_ || srcMemory.isHostMemDirectAccess() || dstMemory.isHostMemDirectAccess())) { - result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire); + result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire, + copyMetadata); if (result) { synchronize(); @@ -1686,7 +1704,7 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory // ================================================================================================ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -1716,7 +1734,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, if (setup_.disableReadBuffer_ || (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); - result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire); + result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata); synchronize(); return result; } else { @@ -1728,7 +1746,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, if (amdMemory == nullptr) { // Force SW copy - result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire); + result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata); synchronize(); return result; } @@ -1740,12 +1758,12 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, Memory* dstMemory = dev().getRocMemory(amdMemory); // Copy image to buffer - result = copyBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire); + result = copyBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire, copyMetadata); // Add pinned memory for a later release gpu().addPinnedMem(amdMemory); } else { - result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire); + result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata); } } @@ -1758,7 +1776,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, const amd::BufferRect& bufRect, const amd::BufferRect& hostRect, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -1767,7 +1785,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); - result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire); + result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire, + copyMetadata); synchronize(); return result; } else { @@ -1777,7 +1796,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, if (amdMemory == nullptr) { // Force SW copy - result = DmaBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire); + result = DmaBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire, + copyMetadata); synchronize(); return result; } @@ -1793,7 +1813,7 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, Memory* dstMemory = dev().getRocMemory(amdMemory); // Copy image to buffer - result = copyBufferRect(srcMemory, *dstMemory, bufRect, rect, size, entire); + result = copyBufferRect(srcMemory, *dstMemory, bufRect, rect, size, entire, copyMetadata); // Add pinned memory for a later release gpu().addPinnedMem(amdMemory); @@ -1807,7 +1827,7 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, // ================================================================================================ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -1832,7 +1852,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo gpuMem(dstMemory).IsPersistentDirectMap()) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); - result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire); + result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata); synchronize(); return result; } else { @@ -1845,7 +1865,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo if (amdMemory == nullptr) { // Force SW copy - result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire); + result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata); synchronize(); return result; } @@ -1857,12 +1877,12 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo Memory* srcMemory = dev().getRocMemory(amdMemory); // Copy buffer rect - result = copyBuffer(*srcMemory, dstMemory, srcOrigin, origin, size, entire); + result = copyBuffer(*srcMemory, dstMemory, srcOrigin, origin, size, entire, copyMetadata); // Add pinned memory for a later release gpu().addPinnedMem(amdMemory); } else { - result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire); + result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata); } } @@ -1875,7 +1895,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory, const amd::BufferRect& hostRect, const amd::BufferRect& bufRect, const amd::Coord3D& size, - bool entire) const { + bool entire, amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -1884,7 +1904,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst gpuMem(dstMemory).IsPersistentDirectMap()) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); - result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire); + result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire, + copyMetadata); synchronize(); return result; } else { @@ -1894,7 +1915,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst if (amdMemory == nullptr) { // Force DMA copy with staging - result = DmaBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire); + result = DmaBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire, + copyMetadata); synchronize(); return result; } @@ -1913,7 +1935,7 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst rect.end_ = hostRect.end_; // Copy buffer rect - result = copyBufferRect(*srcMemory, dstMemory, rect, bufRect, size, entire); + result = copyBufferRect(*srcMemory, dstMemory, rect, bufRect, size, entire, copyMetadata); // Add pinned memory for a later release gpu().addPinnedMem(amdMemory); @@ -2151,7 +2173,8 @@ bool KernelBlitManager::fillBuffer3D(device::Memory& memory, const void* pattern // ================================================================================================ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, - const amd::Coord3D& sizeIn, bool entire) const { + const amd::Coord3D& sizeIn, bool entire, + amd::CopyMetadata copyMetadata) const { amd::ScopedLock k(lockXferOps_); bool result = false; bool p2p = (&gpuMem(srcMemory).dev() != &gpuMem(dstMemory).dev()) && @@ -2247,7 +2270,8 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds gpu().SetCopyCommandType(CL_COMMAND_READ_BUFFER); } } - result = DmaBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire); + result = DmaBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire, + copyMetadata); } synchronize(); diff --git a/projects/clr/rocclr/device/rocm/rocblit.hpp b/projects/clr/rocclr/device/rocm/rocblit.hpp index bdeef4a040..12dc88b6ee 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.hpp +++ b/projects/clr/rocclr/device/rocm/rocblit.hpp @@ -59,7 +59,9 @@ class DmaBlitManager : public device::HostBlitManager { void* dstHost, //!< Destination host memory const amd::Coord3D& origin, //!< Source origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies a buffer object to system memory @@ -68,7 +70,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::BufferRect& bufRect, //!< Source rectangle const amd::BufferRect& hostRect, //!< Destination rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies an image object to system memory @@ -78,7 +82,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::Coord3D& size, //!< Size of the copy region size_t rowPitch, //!< Row pitch for host memory size_t slicePitch, //!< Slice pitch for host memory - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies system memory to a buffer object @@ -86,7 +92,9 @@ class DmaBlitManager : public device::HostBlitManager { device::Memory& dstMemory, //!< Destination memory object const amd::Coord3D& origin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies system memory to a buffer object @@ -95,7 +103,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::BufferRect& hostRect, //!< Destination rectangle const amd::BufferRect& bufRect, //!< Source rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies system memory to an image object @@ -105,7 +115,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::Coord3D& size, //!< Size of the copy region size_t rowPitch, //!< Row pitch for host memory size_t slicePitch, //!< Slice pitch for host memory - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies a buffer object to another buffer object @@ -114,7 +126,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::Coord3D& srcOrigin, //!< Source origin const amd::Coord3D& dstOrigin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies a buffer object to another buffer object @@ -123,7 +137,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::BufferRect& srcRect, //!< Source rectangle const amd::BufferRect& dstRect, //!< Destination rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies an image object to a buffer object @@ -134,7 +150,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies a buffer object to an image object @@ -145,7 +163,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies an image object to another image object @@ -154,7 +174,9 @@ class DmaBlitManager : public device::HostBlitManager { const amd::Coord3D& srcOrigin, //!< Source origin const amd::Coord3D& dstOrigin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Stream memory write operation - Write a 'value' at 'memory'. @@ -290,7 +312,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::BufferRect& srcRectIn, //!< Source rectangle const amd::BufferRect& dstRectIn, //!< Destination rectangle const amd::Coord3D& sizeIn, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies a buffer object to system memory @@ -298,7 +322,9 @@ class KernelBlitManager : public DmaBlitManager { void* dstHost, //!< Destination host memory const amd::Coord3D& origin, //!< Source origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies a buffer object to system memory @@ -307,7 +333,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::BufferRect& bufRect, //!< Source rectangle const amd::BufferRect& hostRect, //!< Destination rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies system memory to a buffer object @@ -315,7 +343,9 @@ class KernelBlitManager : public DmaBlitManager { device::Memory& dstMemory, //!< Destination memory object const amd::Coord3D& origin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies system memory to a buffer object @@ -324,7 +354,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::BufferRect& hostRect, //!< Destination rectangle const amd::BufferRect& bufRect, //!< Source rectangle const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies a buffer object to an image object @@ -333,7 +365,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& srcOrigin, //!< Source origin const amd::Coord3D& dstOrigin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies a buffer object to an image object @@ -344,7 +378,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies an image object to a buffer object @@ -355,7 +391,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies an image object to another image object @@ -364,7 +402,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& srcOrigin, //!< Source origin const amd::Coord3D& dstOrigin, //!< Destination origin const amd::Coord3D& size, //!< Size of the copy region - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies an image object to system memory @@ -374,7 +414,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& size, //!< Size of the copy region size_t rowPitch, //!< Row pitch for host memory size_t slicePitch, //!< Slice pitch for host memory - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Copies system memory to an image object @@ -384,7 +426,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& size, //!< Size of the copy region size_t rowPitch, //!< Row pitch for host memory size_t slicePitch, //!< Slice pitch for host memory - bool entire = false //!< Entire buffer will be updated + bool entire = false, //!< Entire buffer will be updated + amd::CopyMetadata copyMetadata = + amd::CopyMetadata()//!< Memory copy MetaData ) const; //! Fills a buffer memory with a pattern data @@ -481,7 +525,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Copies an image object to a buffer object @@ -492,7 +538,9 @@ class KernelBlitManager : public DmaBlitManager { const amd::Coord3D& size, //!< Size of the copy region bool entire = false, //!< Entire buffer will be updated size_t rowPitch = 0, //!< Pitch for buffer - size_t slicePitch = 0 //!< Slice for buffer + size_t slicePitch = 0, //!< Slice for buffer + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ) const; //! Creates a program for all blit operations diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 302b4b23d3..3cb379e883 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -1534,9 +1534,9 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& cmd) { // Accelerated transfer without pinning amd::Coord3D dstOrigin(offset); result = blitMgr().copyBuffer(*devMem, *hostMemory, origin, dstOrigin, size, - cmd.isEntireMemory()); + cmd.isEntireMemory(), cmd.copyMetadata()); } else { - result = blitMgr().readBuffer(*devMem, dst, origin, size, cmd.isEntireMemory()); + result = blitMgr().readBuffer(*devMem, dst, origin, size, cmd.isEntireMemory(), cmd.copyMetadata()); } break; } @@ -1548,10 +1548,10 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& cmd) { cmd.hostRect().slicePitch_); if (hostMemory != nullptr) { result = blitMgr().copyBufferRect(*devMem, *hostMemory, cmd.bufRect(), hostbufferRect, - size, cmd.isEntireMemory()); + size, cmd.isEntireMemory(), cmd.copyMetadata()); } else { result = blitMgr().readBufferRect(*devMem, dst, cmd.bufRect(), cmd.hostRect(), size, - cmd.isEntireMemory()); + cmd.isEntireMemory(), cmd.copyMetadata()); } break; } @@ -1576,10 +1576,10 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& cmd) { amd::Coord3D dstOrigin(offset); result = blitMgr().copyImageToBuffer(*devMem, *hostMemory, cmd.origin(), dstOrigin, size, - cmd.isEntireMemory(), cmd.rowPitch(), cmd.slicePitch()); + cmd.isEntireMemory(), cmd.rowPitch(), cmd.slicePitch(), cmd.copyMetadata()); } else { result = blitMgr().readImage(*devMem, dst, cmd.origin(), size, cmd.rowPitch(), - cmd.slicePitch(), cmd.isEntireMemory()); + cmd.slicePitch(), cmd.isEntireMemory(), cmd.copyMetadata()); } break; } @@ -1641,9 +1641,9 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& cmd) { // Accelerated transfer without pinning amd::Coord3D srcOrigin(offset); result = blitMgr().copyBuffer(*hostMemory, *devMem, srcOrigin, origin, size, - cmd.isEntireMemory()); + cmd.isEntireMemory(), cmd.copyMetadata()); } else { - result = blitMgr().writeBuffer(src, *devMem, origin, size, cmd.isEntireMemory()); + result = blitMgr().writeBuffer(src, *devMem, origin, size, cmd.isEntireMemory(), cmd.copyMetadata()); } break; } @@ -1655,10 +1655,10 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& cmd) { cmd.hostRect().slicePitch_); if (hostMemory != nullptr) { result = blitMgr().copyBufferRect(*hostMemory, *devMem, hostbufferRect, cmd.bufRect(), - size, cmd.isEntireMemory()); + size, cmd.isEntireMemory(), cmd.copyMetadata()); } else { result = blitMgr().writeBufferRect(src, *devMem, cmd.hostRect(), cmd.bufRect(), size, - cmd.isEntireMemory()); + cmd.isEntireMemory(), cmd.copyMetadata()); } break; } @@ -1668,10 +1668,11 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& cmd) { amd::Coord3D srcOrigin(offset); result = blitMgr().copyBufferToImage(*hostMemory, *devMem, srcOrigin, cmd.origin(), size, - cmd.isEntireMemory(), cmd.rowPitch(), cmd.slicePitch()); + cmd.isEntireMemory(), cmd.rowPitch(), cmd.slicePitch(), + cmd.copyMetadata()); } else { result = blitMgr().writeImage(src, *devMem, cmd.origin(), size, cmd.rowPitch(), - cmd.slicePitch(), cmd.isEntireMemory()); + cmd.slicePitch(), cmd.isEntireMemory(), cmd.copyMetadata()); } break; } @@ -1752,7 +1753,8 @@ void VirtualGPU::submitSvmPrefetchAsync(amd::SvmPrefetchAsyncCommand& cmd) { bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memory& dstMem, bool entire, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, const amd::Coord3D& size, - const amd::BufferRect& srcRect, const amd::BufferRect& dstRect) { + const amd::BufferRect& srcRect, const amd::BufferRect& dstRect, + amd::CopyMetadata copyMetadata) { Memory* srcDevMem = dev().getRocMemory(&srcMem); Memory* dstDevMem = dev().getRocMemory(&dstMem); @@ -1796,23 +1798,28 @@ bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memo realSize.c[0] *= elemSize; } - result = blitMgr().copyBuffer(*srcDevMem, *dstDevMem, realSrcOrigin, realDstOrigin, realSize, entire); + result = blitMgr().copyBuffer(*srcDevMem, *dstDevMem, realSrcOrigin, realDstOrigin, + realSize, entire, copyMetadata); break; } case CL_COMMAND_COPY_BUFFER_RECT: { - result = blitMgr().copyBufferRect(*srcDevMem, *dstDevMem, srcRect, dstRect, size, entire); + result = blitMgr().copyBufferRect(*srcDevMem, *dstDevMem, srcRect, dstRect, size, entire, + copyMetadata); break; } case CL_COMMAND_COPY_IMAGE: { - result = blitMgr().copyImage(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire); + result = blitMgr().copyImage(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire, + copyMetadata); break; } case CL_COMMAND_COPY_IMAGE_TO_BUFFER: { - result = blitMgr().copyImageToBuffer(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire); + result = blitMgr().copyImageToBuffer(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire, + 0UL, 0UL, copyMetadata); break; } case CL_COMMAND_COPY_BUFFER_TO_IMAGE: { - result = blitMgr().copyBufferToImage(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire); + result = blitMgr().copyBufferToImage(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire, + 0UL, 0UL, copyMetadata); break; } default: @@ -1841,7 +1848,7 @@ void VirtualGPU::submitCopyMemory(amd::CopyMemoryCommand& cmd) { bool entire = cmd.isEntireMemory(); if (!copyMemory(type, cmd.source(), cmd.destination(), entire, cmd.srcOrigin(), - cmd.dstOrigin(), cmd.size(), cmd.srcRect(), cmd.dstRect())) { + cmd.dstOrigin(), cmd.size(), cmd.srcRect(), cmd.dstRect(), cmd.copyMetadata())) { cmd.setStatus(CL_INVALID_OPERATION); } diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp index 124cdf4eef..8bf88c0818 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp @@ -470,7 +470,9 @@ class VirtualGPU : public device::VirtualDevice { const amd::Coord3D& dstOrigin, //!< destination memory object const amd::Coord3D& size, //!< copy size const amd::BufferRect& srcRect, //!< region of source for copy - const amd::BufferRect& dstRect //!< region of destination for copy + const amd::BufferRect& dstRect, //!< region of destination for copy + amd::CopyMetadata copyMetadata = + amd::CopyMetadata() //!< Memory copy MetaData ); //! Updates AQL header for the upcomming dispatch diff --git a/projects/clr/rocclr/platform/command.hpp b/projects/clr/rocclr/platform/command.hpp index d223d0e5f2..7022ec3d94 100644 --- a/projects/clr/rocclr/platform/command.hpp +++ b/projects/clr/rocclr/platform/command.hpp @@ -235,6 +235,25 @@ class Event : public RuntimeObject { void setEventScope(int32_t scope) { event_scope_ = scope; } }; +union CopyMetadata { + + enum CopyEnginePreference { + NONE = 0, + BLIT = 1, + SDMA = 2, + CPDMA = 3 + }; + + struct { + uint32_t isAsync_ : 1; + uint32_t copyEnginePreference_ : 2; + }; + uint32_t flags_; + CopyMetadata() : flags_(0){} + CopyMetadata(bool isAsync, CopyEnginePreference copyEnginePreference) + : isAsync_(isAsync), copyEnginePreference_(copyEnginePreference){} +}; + /*! \brief An operation that is submitted to a command queue. * * %Command is the abstract base type of all OpenCL operations @@ -463,18 +482,20 @@ class ReadMemoryCommand : public OneMemoryArgCommand { BufferRect bufRect_; //!< Buffer rectangle information BufferRect hostRect_; //!< Host memory rectangle information - + amd::CopyMetadata copyMetadata_; public: //! Construct a new ReadMemoryCommand ReadMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, Coord3D origin, Coord3D size, void* hostPtr, - size_t rowPitch = 0, size_t slicePitch = 0) + size_t rowPitch = 0, size_t slicePitch = 0, + amd::CopyMetadata copyMetadata = amd::CopyMetadata()) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), origin_(origin), size_(size), hostPtr_(hostPtr), rowPitch_(rowPitch), - slicePitch_(slicePitch) { + slicePitch_(slicePitch), + copyMetadata_(copyMetadata) { // Sanity checks assert(hostPtr != NULL && "hostPtr cannot be null"); assert(size.c[0] > 0 && "invalid"); @@ -483,7 +504,8 @@ class ReadMemoryCommand : public OneMemoryArgCommand { //! Construct a new ReadMemoryCommand ReadMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, Coord3D origin, Coord3D size, void* hostPtr, - const BufferRect& bufRect, const BufferRect& hostRect) + const BufferRect& bufRect, const BufferRect& hostRect, + amd::CopyMetadata copyMetadata = amd::CopyMetadata()) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), origin_(origin), size_(size), @@ -491,7 +513,8 @@ class ReadMemoryCommand : public OneMemoryArgCommand { rowPitch_(0), slicePitch_(0), bufRect_(bufRect), - hostRect_(hostRect) { + hostRect_(hostRect), + copyMetadata_(copyMetadata) { // Sanity checks assert(hostPtr != NULL && "hostPtr cannot be null"); assert(size.c[0] > 0 && "invalid"); @@ -517,7 +540,8 @@ class ReadMemoryCommand : public OneMemoryArgCommand { const BufferRect& bufRect() const { return bufRect_; } //! Return the host rectangle information const BufferRect& hostRect() const { return hostRect_; } - + //! Return the copy MetaData + amd::CopyMetadata copyMetadata() const { return copyMetadata_; } //! Updates the host memory to read from void setSource(Memory& memory) { memory_ = &memory; } //! Updates the host memory to write to @@ -580,17 +604,20 @@ class WriteMemoryCommand : public OneMemoryArgCommand { BufferRect bufRect_; //!< Buffer rectangle information BufferRect hostRect_; //!< Host memory rectangle information + amd::CopyMetadata copyMetadata_; public: WriteMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, Coord3D origin, Coord3D size, const void* hostPtr, - size_t rowPitch = 0, size_t slicePitch = 0) + size_t rowPitch = 0, size_t slicePitch = 0, + amd::CopyMetadata copyMetadata = amd::CopyMetadata()) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), origin_(origin), size_(size), hostPtr_(hostPtr), rowPitch_(rowPitch), - slicePitch_(slicePitch) { + slicePitch_(slicePitch), + copyMetadata_(copyMetadata){ // Sanity checks assert(hostPtr != NULL && "hostPtr cannot be null"); assert(size.c[0] > 0 && "invalid"); @@ -598,7 +625,8 @@ class WriteMemoryCommand : public OneMemoryArgCommand { WriteMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& memory, Coord3D origin, Coord3D size, const void* hostPtr, - const BufferRect& bufRect, const BufferRect& hostRect) + const BufferRect& bufRect, const BufferRect& hostRect, + amd::CopyMetadata copyMetadata = amd::CopyMetadata()) : OneMemoryArgCommand(queue, cmdType, eventWaitList, memory), origin_(origin), size_(size), @@ -606,7 +634,8 @@ class WriteMemoryCommand : public OneMemoryArgCommand { rowPitch_(0), slicePitch_(0), bufRect_(bufRect), - hostRect_(hostRect) { + hostRect_(hostRect), + copyMetadata_(copyMetadata){ // Sanity checks assert(hostPtr != NULL && "hostPtr cannot be null"); assert(size.c[0] > 0 && "invalid"); @@ -632,7 +661,8 @@ class WriteMemoryCommand : public OneMemoryArgCommand { const BufferRect& bufRect() const { return bufRect_; } //! Return the host rectangle information const BufferRect& hostRect() const { return hostRect_; } - + //! Return the copy MetaData + amd::CopyMetadata copyMetadata() const { return copyMetadata_; } //! Updates the host memory to read from void setSource(const void* hostPtr) { hostPtr_ = hostPtr; } //! Updates the host memory to write to @@ -831,28 +861,31 @@ class CopyMemoryCommand : public TwoMemoryArgsCommand { BufferRect srcRect_; //!< Source buffer rectangle information BufferRect dstRect_; //!< Destination buffer rectangle information - + amd::CopyMetadata copyMetadata_; public: CopyMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& srcMemory, Memory& dstMemory, Coord3D srcOrigin, Coord3D dstOrigin, - Coord3D size) + Coord3D size, amd::CopyMetadata copyMetadata = amd::CopyMetadata()) : TwoMemoryArgsCommand(queue, cmdType, eventWaitList, srcMemory, dstMemory), srcOrigin_(srcOrigin), dstOrigin_(dstOrigin), - size_(size) { + size_(size), + copyMetadata_(copyMetadata){ // Sanity checks assert(size.c[0] > 0 && "invalid"); } CopyMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList, Memory& srcMemory, Memory& dstMemory, Coord3D srcOrigin, Coord3D dstOrigin, - Coord3D size, const BufferRect& srcRect, const BufferRect& dstRect) + Coord3D size, const BufferRect& srcRect, const BufferRect& dstRect, + amd::CopyMetadata copyMetadata = amd::CopyMetadata()) : TwoMemoryArgsCommand(queue, cmdType, eventWaitList, srcMemory, dstMemory), srcOrigin_(srcOrigin), dstOrigin_(dstOrigin), size_(size), srcRect_(srcRect), - dstRect_(dstRect) { + dstRect_(dstRect), + copyMetadata_(copyMetadata) { // Sanity checks assert(size.c[0] > 0 && "invalid"); } @@ -875,7 +908,8 @@ class CopyMemoryCommand : public TwoMemoryArgsCommand { const BufferRect& srcRect() const { return srcRect_; } //! Return the destination buffer rectangle information const BufferRect& dstRect() const { return dstRect_; } - + //! Return the copy MetaData + amd::CopyMetadata copyMetadata() const { return copyMetadata_; } //! Updates the host memory to read from void setSource(Memory& srcMemory) { memory1_ = &srcMemory; } //! Updates the memory object to write to.