SWDEV-369581 - Convey copy API metadata to ROCclr

Change-Id: I569462d6d268700d419510255e201bf7d80d6714


[ROCm/clr commit: 72b45e2a1f]
Этот коммит содержится в:
Ioannis Assiouras
2022-12-01 15:55:39 +00:00
родитель 4e6a699eae
Коммит 733c8d1d1c
11 изменённых файлов: 535 добавлений и 285 удалений
+18 -11
Просмотреть файл
@@ -32,7 +32,7 @@ HostBlitManager::HostBlitManager(VirtualDevice& vDev, Setup setup)
bool HostBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
// Map the device memory to CPU visible
void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly);
if (NULL == src) {
@@ -52,7 +52,7 @@ bool HostBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
bool HostBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
const amd::BufferRect& bufRect,
const amd::BufferRect& hostRect, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
// Map source memory
void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly);
if (src == NULL) {
@@ -82,7 +82,8 @@ bool HostBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
bool HostBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
const amd::Coord3D& origin, const amd::Coord3D& size,
size_t rowPitch, size_t slicePitch, bool entire) const {
size_t rowPitch, size_t slicePitch, bool entire,
amd::CopyMetadata copyMetadata) const {
size_t startLayer = origin[2];
size_t numLayers = size[2];
if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
@@ -148,7 +149,7 @@ bool HostBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
bool HostBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
uint flags = 0;
if (entire) {
flags = Memory::CpuWriteOnly;
@@ -173,7 +174,7 @@ bool HostBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory
bool HostBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory,
const amd::BufferRect& hostRect,
const amd::BufferRect& bufRect, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
// Map destination memory
void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0);
if (dst == NULL) {
@@ -203,7 +204,8 @@ bool HostBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMe
bool HostBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
size_t rowPitch, size_t slicePitch, bool entire) const {
size_t rowPitch, size_t slicePitch, bool entire,
amd::CopyMetadata copyMetadata) const {
uint flags = 0;
if (entire) {
flags = Memory::CpuWriteOnly;
@@ -272,7 +274,8 @@ bool HostBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
bool HostBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
// Map source memory
void* src = srcMemory.cpuMap(vDev_,
// Overlap detection
@@ -302,7 +305,8 @@ bool HostBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstM
bool HostBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
// Map source memory
void* src = srcMemory.cpuMap(vDev_,
// Overlap detection
@@ -340,7 +344,8 @@ bool HostBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory&
bool HostBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
bool entire, size_t rowPitch, size_t slicePitch) const {
bool entire, size_t rowPitch, size_t slicePitch,
amd::CopyMetadata copyMetadata) const {
size_t startLayer = srcOrigin[2];
size_t numLayers = size[2];
if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
@@ -405,7 +410,8 @@ bool HostBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memor
bool HostBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
bool entire, size_t rowPitch, size_t slicePitch) const {
bool entire, size_t rowPitch, size_t slicePitch,
amd::CopyMetadata copyMetadata) const {
// Map source memory
void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly);
if (src == NULL) {
@@ -469,7 +475,8 @@ bool HostBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memor
bool HostBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
size_t startLayer = srcOrigin[2];
size_t numLayers = size[2];
if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+66 -22
Просмотреть файл
@@ -76,7 +76,9 @@ class BlitManager : public amd::HeapObject {
void* dstHost, //!< Destination host memory
const amd::Coord3D& origin, //!< Source origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const = 0;
//! Copies a buffer object to system memory
@@ -85,7 +87,9 @@ class BlitManager : public amd::HeapObject {
const amd::BufferRect& bufRect, //!< Source rectangle
const amd::BufferRect& hostRect, //!< Destination rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const = 0;
//! Copies an image object to system memory
@@ -95,7 +99,9 @@ class BlitManager : public amd::HeapObject {
const amd::Coord3D& size, //!< Size of the copy region
size_t rowPitch, //!< Row pitch for host memory
size_t slicePitch, //!< Slice pitch for host memory
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const = 0;
//! Copies system memory to a buffer object
@@ -103,7 +109,9 @@ class BlitManager : public amd::HeapObject {
Memory& dstMemory, //!< Destination memory object
const amd::Coord3D& origin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const = 0;
//! Copies system memory to a buffer object
@@ -112,7 +120,9 @@ class BlitManager : public amd::HeapObject {
const amd::BufferRect& hostRect, //!< Destination rectangle
const amd::BufferRect& bufRect, //!< Source rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const = 0;
//! Copies system memory to an image object
@@ -122,7 +132,9 @@ class BlitManager : public amd::HeapObject {
const amd::Coord3D& size, //!< Size of the copy region
size_t rowPitch, //!< Row pitch for host memory
size_t slicePitch, //!< Slice pitch for host memory
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const = 0;
//! Copies a buffer object to another buffer object
@@ -131,7 +143,9 @@ class BlitManager : public amd::HeapObject {
const amd::Coord3D& srcOrigin, //!< Source origin
const amd::Coord3D& dstOrigin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const = 0;
//! Copies a buffer object to another buffer object
@@ -140,7 +154,9 @@ class BlitManager : public amd::HeapObject {
const amd::BufferRect& srcRect, //!< Source rectangle
const amd::BufferRect& dstRect, //!< Destination rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const = 0;
//! Copies an image object to a buffer object
@@ -151,7 +167,9 @@ class BlitManager : public amd::HeapObject {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const = 0;
//! Copies a buffer object to an image object
@@ -162,7 +180,9 @@ class BlitManager : public amd::HeapObject {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const = 0;
//! Copies an image object to another image object
@@ -171,7 +191,9 @@ class BlitManager : public amd::HeapObject {
const amd::Coord3D& srcOrigin, //!< Source origin
const amd::Coord3D& dstOrigin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const = 0;
//! Fills a buffer memory with a pattern data
@@ -248,7 +270,9 @@ class HostBlitManager : public device::BlitManager {
void* dstHost, //!< Destination host memory
const amd::Coord3D& origin, //!< Source origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies a buffer object to system memory
@@ -257,7 +281,9 @@ class HostBlitManager : public device::BlitManager {
const amd::BufferRect& bufRect, //!< Source rectangle
const amd::BufferRect& hostRect, //!< Destination rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies an image object to system memory
@@ -267,7 +293,9 @@ class HostBlitManager : public device::BlitManager {
const amd::Coord3D& size, //!< Size of the copy region
size_t rowPitch, //!< Row pitch for host memory
size_t slicePitch, //!< Slice pitch for host memory
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies system memory to a buffer object
@@ -275,7 +303,9 @@ class HostBlitManager : public device::BlitManager {
device::Memory& dstMemory, //!< Destination memory object
const amd::Coord3D& origin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies system memory to a buffer object
@@ -284,7 +314,9 @@ class HostBlitManager : public device::BlitManager {
const amd::BufferRect& hostRect, //!< Destination rectangle
const amd::BufferRect& bufRect, //!< Source rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies system memory to an image object
@@ -294,7 +326,9 @@ class HostBlitManager : public device::BlitManager {
const amd::Coord3D& size, //!< Size of the copy region
size_t rowPitch, //!< Row pitch for host memory
size_t slicePitch, //!< Slice pitch for host memory
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies a buffer object to another buffer object
@@ -303,7 +337,9 @@ class HostBlitManager : public device::BlitManager {
const amd::Coord3D& srcOrigin, //!< Source origin
const amd::Coord3D& dstOrigin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies a buffer object to another buffer object
@@ -312,7 +348,9 @@ class HostBlitManager : public device::BlitManager {
const amd::BufferRect& srcRect, //!< Source rectangle
const amd::BufferRect& dstRect, //!< Destination rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies an image object to a buffer object
@@ -323,7 +361,9 @@ class HostBlitManager : public device::BlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies a buffer object to an image object
@@ -334,7 +374,9 @@ class HostBlitManager : public device::BlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies an image object to another image object
@@ -343,7 +385,9 @@ class HostBlitManager : public device::BlitManager {
const amd::Coord3D& srcOrigin, //!< Source origin
const amd::Coord3D& dstOrigin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Fills a buffer memory with a pattern data
+102 -73
Просмотреть файл
@@ -117,12 +117,12 @@ bool DmaBlitManager::readMemoryStaged(Memory& srcMemory, void* dstHost, Memory**
bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
// Use host copy if memory has direct access
if (setup_.disableReadBuffer_ ||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
gpu().releaseGpuMemoryFence();
return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
} else {
size_t srcSize = size[0];
size_t offset = 0;
@@ -206,12 +206,14 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
const amd::BufferRect& bufRect, const amd::BufferRect& hostRect,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
// Use host copy if memory has direct access
if (setup_.disableReadBufferRect_ ||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
gpu().releaseGpuMemoryFence();
return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire,
copyMetadata);
} else {
Memory& xferBuf = dev().xferRead().acquire();
@@ -259,16 +261,16 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
bool DmaBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin,
const amd::Coord3D& size, size_t rowPitch, size_t slicePitch,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
gpu().releaseGpuMemoryFence();
if (setup_.disableReadImage_) {
return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch,
entire);
entire, copyMetadata);
} else {
//! @todo Add HW accelerated path
return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch,
entire);
entire, copyMetadata);
}
return true;
@@ -327,14 +329,14 @@ bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, M
bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
// Use host copy if memory has direct access or it's persistent
if (setup_.disableWriteBuffer_ ||
(gpuMem(dstMemory).isHostMemDirectAccess() &&
(gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
gpuMem(dstMemory).isPersistentDirectMap()) {
gpu().releaseGpuMemoryFence();
return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
} else {
size_t dstSize = size[0];
size_t offset = 0;
@@ -416,14 +418,15 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory,
const amd::BufferRect& hostRect,
const amd::BufferRect& bufRect, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
// Use host copy if memory has direct access or it's persistent
if (setup_.disableWriteBufferRect_ ||
(dstMemory.isHostMemDirectAccess() &&
(gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
gpuMem(dstMemory).isPersistentDirectMap()) {
gpu().releaseGpuMemoryFence();
return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire,
copyMetadata);
} else {
Memory& xferBuf = gpu().xferWrite().Acquire(std::min(gpu().xferWrite().MaxSize(), size[0]));
@@ -473,15 +476,16 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
size_t rowPitch, size_t slicePitch, bool entire) const {
size_t rowPitch, size_t slicePitch, bool entire,
amd::CopyMetadata copyMetadata) const {
gpu().releaseGpuMemoryFence();
if (setup_.disableWriteImage_) {
return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
entire);
entire, copyMetadata);
} else {
//! @todo Add HW accelerated path
return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
entire);
entire, copyMetadata);
}
return true;
@@ -489,12 +493,14 @@ bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
if (setup_.disableCopyBuffer_ ||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable() &&
!dev().settings().apuSystem_ && gpuMem(dstMemory).isHostMemDirectAccess())) {
gpu().releaseGpuMemoryFence();
return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size);
return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, false,
copyMetadata);
} else {
return gpuMem(srcMemory).partialMemCopyTo(gpu(), srcOrigin, dstOrigin, size, gpuMem(dstMemory));
}
@@ -504,12 +510,14 @@ bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMe
bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
if (setup_.disableCopyBufferRect_ ||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable() &&
gpuMem(dstMemory).isHostMemDirectAccess())) {
gpu().releaseGpuMemoryFence();
return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire);
return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire,
copyMetadata);
} else {
size_t srcOffset;
size_t dstOffset;
@@ -580,12 +588,12 @@ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& d
bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire, size_t rowPitch,
size_t slicePitch) const {
size_t slicePitch, amd::CopyMetadata copyMetadata) const {
bool result = false;
if (setup_.disableCopyImageToBuffer_) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
} else {
// Use PAL path for a transfer
result =
@@ -595,7 +603,7 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory
if (completeOperation_ && !result) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
}
}
@@ -605,12 +613,12 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory
bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire, size_t rowPitch,
size_t slicePitch) const {
size_t slicePitch, amd::CopyMetadata copyMetadata) const {
bool result = false;
if (setup_.disableCopyBufferToImage_) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
} else {
// Use PAL path for a transfer
result =
@@ -620,7 +628,7 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
if (completeOperation_ && !result) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
}
}
@@ -629,16 +637,19 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
bool DmaBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
bool result = false;
gpu().releaseGpuMemoryFence();
if (setup_.disableCopyImage_) {
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
copyMetadata);
} else {
//! @todo Add HW accelerated path
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
copyMetadata);
}
return result;
@@ -785,7 +796,8 @@ const uint RejectedFormatChannelTotal = sizeof(RejectedOrder) / sizeof(FormatCon
bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
bool entire, size_t rowPitch, size_t slicePitch) const {
bool entire, size_t rowPitch, size_t slicePitch,
amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
static const bool CopyRect = false;
@@ -796,7 +808,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
if (setup_.disableCopyBufferToImage_) {
result = DmaBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
synchronize();
return result;
}
@@ -808,7 +820,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
// but there are restriciton with older hardware
if (dev().settings().imageDMA_) {
result = DmaBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
if (result) {
synchronize();
return result;
@@ -899,7 +911,8 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
// Step 2. Initiate compute transfer with all staging buffers
for (uint i = 0; i < MaxXferBuffers; ++i) {
if (copySize > 0) {
if (!copyBufferToImageKernel(*xferBuf[i], dstMemory, xferSrc, dst, xferRect, false)) {
if (!copyBufferToImageKernel(*xferBuf[i], dstMemory, xferSrc, dst, xferRect, false,
0UL, 0UL, copyMetadata)) {
transfer = false;
break;
}
@@ -941,7 +954,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
if (!result) {
result = copyBufferToImageKernel(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
rowPitch, slicePitch);
rowPitch, slicePitch, copyMetadata);
}
synchronize();
@@ -1045,7 +1058,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire,
size_t rowPitch, size_t slicePitch) const {
size_t rowPitch, size_t slicePitch,
amd::CopyMetadata copyMetadata) const {
bool rejected = false;
Memory* dstView = &gpuMem(dstMemory);
bool releaseView = false;
@@ -1084,7 +1098,7 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
// Fall into the host path if the image format was rejected
if (rejected) {
return HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire);
entire, 0UL, 0UL, copyMetadata);
}
// Use a common blit type with three dimensions by default
@@ -1183,7 +1197,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
bool entire, size_t rowPitch, size_t slicePitch) const {
bool entire, size_t rowPitch, size_t slicePitch,
amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
static const bool CopyRect = false;
@@ -1194,7 +1209,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
if (setup_.disableCopyImageToBuffer_) {
result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
synchronize();
return result;
}
@@ -1209,7 +1224,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
if (dev().settings().imageDMA_ &&
gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical) {
result = DmaBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
if (result) {
synchronize();
return result;
@@ -1280,7 +1295,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
for (uint i = 0; i < MaxXferBuffers; ++i) {
if (copySizeTmp > 0) {
if (!copyImageToBufferKernel(srcMemory, *xferBuf[i], srcTmp, xferDst, xferRectTmp,
false)) {
false, 0UL, 0UL, copyMetadata)) {
transfer = false;
break;
}
@@ -1352,7 +1367,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
if (!result) {
result = copyImageToBufferKernel(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
rowPitch, slicePitch);
rowPitch, slicePitch, copyMetadata);
}
synchronize();
@@ -1365,7 +1380,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire,
size_t rowPitch, size_t slicePitch) const {
size_t rowPitch, size_t slicePitch,
amd::CopyMetadata copyMetadata) const {
bool rejected = false;
Memory* srcView = &gpuMem(srcMemory);
bool releaseView = false;
@@ -1404,7 +1420,7 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
// Fall into the host path if the image format was rejected
if (rejected) {
return HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire);
entire, 0UL, 0UL, copyMetadata);
}
uint blitType = BlitCopyImageToBuffer;
@@ -1506,7 +1522,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool rejected = false;
Memory* srcView = &gpuMem(srcMemory);
@@ -1553,7 +1570,8 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst
// Fall into the host path for the entire 2D copy or
// if the image format was rejected
if (rejected) {
result = HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
result = HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
copyMetadata);
synchronize();
return result;
}
@@ -1663,7 +1681,8 @@ void FindPinSize(size_t& pinSize, const amd::Coord3D& size, size_t& rowPitch, si
bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
const amd::Coord3D& origin, const amd::Coord3D& size,
size_t rowPitch, size_t slicePitch, bool entire) const {
size_t rowPitch, size_t slicePitch, bool entire,
amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
@@ -1673,7 +1692,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
gpu().releaseGpuMemoryFence();
result =
HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire);
HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire,
copyMetadata);
synchronize();
return result;
} else {
@@ -1686,7 +1706,7 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
if (amdMemory == NULL) {
// Force SW copy
result = HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch,
entire);
entire, copyMetadata);
synchronize();
return result;
}
@@ -1699,7 +1719,7 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
// Copy image to buffer
result = copyImageToBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire, rowPitch,
slicePitch);
slicePitch, copyMetadata);
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
@@ -1712,7 +1732,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
size_t rowPitch, size_t slicePitch, bool entire) const {
size_t rowPitch, size_t slicePitch, bool entire,
amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
@@ -1722,7 +1743,8 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
gpu().releaseGpuMemoryFence();
result =
HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire);
HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire,
copyMetadata);
synchronize();
return result;
} else {
@@ -1738,7 +1760,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
if (amdMemory == nullptr) {
// Force SW copy
result = HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
entire);
entire, copyMetadata);
synchronize();
return result;
}
@@ -1756,7 +1778,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
// Copy image to buffer
result = copyBufferToImage(*srcMemory, dstMemory, srcOrigin, origin, size, entire, rowPitch,
slicePitch);
slicePitch, copyMetadata);
if (pinned) {
// Add pinned memory for a later release
@@ -1774,7 +1796,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::BufferRect& srcRectIn,
const amd::BufferRect& dstRectIn, const amd::Coord3D& sizeIn,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
bool rejected = false;
@@ -1784,7 +1806,8 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory
gpuMem(dstMemory).isHostMemDirectAccess()) {
if (!dev().settings().disableSdma_) {
result =
DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire);
DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire,
copyMetadata);
}
if (result) {
synchronize();
@@ -1888,7 +1911,7 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory
bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
@@ -1897,7 +1920,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
synchronize();
return result;
} else {
@@ -1909,7 +1932,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
if (amdMemory == NULL) {
// Force SW copy
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
synchronize();
return result;
}
@@ -1921,12 +1944,12 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
Memory* dstMemory = dev().getGpuMemory(amdMemory);
// Copy image to buffer
result = copyBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire);
result = copyBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire, copyMetadata);
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
} else {
result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
}
}
@@ -1938,7 +1961,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
const amd::BufferRect& bufRect,
const amd::BufferRect& hostRect, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
@@ -1947,7 +1970,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire,
copyMetadata);
synchronize();
return result;
} else {
@@ -1957,7 +1981,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
if (amdMemory == NULL) {
// Force SW copy
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire,
copyMetadata);
synchronize();
return result;
}
@@ -1973,7 +1998,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
Memory* dstMemory = dev().getGpuMemory(amdMemory);
// Copy image to buffer
result = copyBufferRect(srcMemory, *dstMemory, bufRect, rect, size, entire);
result = copyBufferRect(srcMemory, *dstMemory, bufRect, rect, size, entire,
copyMetadata);
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
@@ -1986,7 +2012,7 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
@@ -1997,7 +2023,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
(gpuMem(dstMemory).memoryType() == Resource::Persistent)) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
synchronize();
return result;
} else {
@@ -2010,7 +2036,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
if (amdMemory == NULL) {
// Force SW copy
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
synchronize();
return result;
}
@@ -2022,12 +2048,12 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
Memory* srcMemory = dev().getGpuMemory(amdMemory);
// Copy buffer rect
result = copyBuffer(*srcMemory, dstMemory, srcOrigin, origin, size, entire);
result = copyBuffer(*srcMemory, dstMemory, srcOrigin, origin, size, entire, copyMetadata);
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
} else {
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
}
}
@@ -2039,7 +2065,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory,
const amd::BufferRect& hostRect,
const amd::BufferRect& bufRect, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
@@ -2050,7 +2076,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
gpuMem(dstMemory).isPersistentDirectMap()) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire,
copyMetadata);
synchronize();
return result;
} else {
@@ -2061,7 +2088,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
if (amdMemory == NULL) {
// Force SW copy
result =
HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire,
copyMetadata);
synchronize();
return result;
}
@@ -2080,7 +2108,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
rect.end_ = hostRect.end_;
// Copy buffer rect
result = copyBufferRect(*srcMemory, dstMemory, rect, bufRect, size, entire);
result = copyBufferRect(*srcMemory, dstMemory, rect, bufRect, size, entire,
copyMetadata);
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
@@ -2172,7 +2201,7 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern,
bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& sizeIn, bool entire) const {
const amd::Coord3D& sizeIn, bool entire, amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
@@ -2246,7 +2275,7 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds
address parameters = kernels_[blitType]->parameters().values();
result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters);
} else {
result = DmaBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire);
result = DmaBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire, copyMetadata);
}
synchronize();
+72 -24
Просмотреть файл
@@ -57,7 +57,9 @@ class DmaBlitManager : public device::HostBlitManager {
void* dstHost, //!< Destination host memory
const amd::Coord3D& origin, //!< Source origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies a buffer object to system memory
@@ -66,7 +68,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::BufferRect& bufRect, //!< Source rectangle
const amd::BufferRect& hostRect, //!< Destination rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies an image object to system memory
@@ -76,7 +80,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
size_t rowPitch, //!< Row pitch for host memory
size_t slicePitch, //!< Slice pitch for host memory
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies system memory to a buffer object
@@ -84,7 +90,9 @@ class DmaBlitManager : public device::HostBlitManager {
device::Memory& dstMemory, //!< Destination memory object
const amd::Coord3D& origin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies system memory to a buffer object
@@ -93,7 +101,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::BufferRect& hostRect, //!< Destination rectangle
const amd::BufferRect& bufRect, //!< Source rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies system memory to an image object
@@ -103,7 +113,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
size_t rowPitch, //!< Row pitch for host memory
size_t slicePitch, //!< Slice pitch for host memory
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies a buffer object to another buffer object
@@ -112,7 +124,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::Coord3D& srcOrigin, //!< Source origin
const amd::Coord3D& dstOrigin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies a buffer object to another buffer object
@@ -121,7 +135,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::BufferRect& srcRect, //!< Source rectangle
const amd::BufferRect& dstRect, //!< Destination rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies an image object to a buffer object
@@ -132,7 +148,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies a buffer object to an image object
@@ -143,7 +161,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies an image object to another image object
@@ -152,7 +172,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::Coord3D& srcOrigin, //!< Source origin
const amd::Coord3D& dstOrigin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Stream memory write operation - Write a 'value' at 'memory'.
@@ -269,7 +291,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::BufferRect& srcRectIn, //!< Source rectangle
const amd::BufferRect& dstRectIn, //!< Destination rectangle
const amd::Coord3D& sizeIn, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies a buffer object to system memory
@@ -277,7 +301,9 @@ class KernelBlitManager : public DmaBlitManager {
void* dstHost, //!< Destination host memory
const amd::Coord3D& origin, //!< Source origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies a buffer object to system memory
@@ -286,7 +312,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::BufferRect& bufRect, //!< Source rectangle
const amd::BufferRect& hostRect, //!< Destination rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies system memory to a buffer object
@@ -294,7 +322,9 @@ class KernelBlitManager : public DmaBlitManager {
device::Memory& dstMemory, //!< Destination memory object
const amd::Coord3D& origin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies system memory to a buffer object
@@ -303,7 +333,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::BufferRect& hostRect, //!< Destination rectangle
const amd::BufferRect& bufRect, //!< Source rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies a buffer object to an image object
@@ -312,7 +344,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& srcOrigin, //!< Source origin
const amd::Coord3D& dstOrigin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies a buffer object to an image object
@@ -323,7 +357,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies an image object to a buffer object
@@ -334,7 +370,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies an image object to another image object
@@ -343,7 +381,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& srcOrigin, //!< Source origin
const amd::Coord3D& dstOrigin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies an image object to system memory
@@ -353,7 +393,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
size_t rowPitch, //!< Row pitch for host memory
size_t slicePitch, //!< Slice pitch for host memory
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies system memory to an image object
@@ -363,7 +405,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
size_t rowPitch, //!< Row pitch for host memory
size_t slicePitch, //!< Slice pitch for host memory
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Fills a buffer memory with a pattern data
@@ -433,7 +477,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies an image object to a buffer object
@@ -444,7 +490,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Creates a program for all blit operations
+27 -22
Просмотреть файл
@@ -1200,7 +1200,7 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) {
// Accelerated transfer without pinning
amd::Coord3D dstOrigin(offset);
result = blitMgr().copyBuffer(*memory, *hostMemory, origin, dstOrigin, size,
vcmd.isEntireMemory());
vcmd.isEntireMemory(), vcmd.copyMetadata());
} else {
// The logic below will perform 2 step copy to make sure memory pinning doesn't
// occur on the first unaligned page, because in Windows memory manager can
@@ -1218,11 +1218,11 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) {
}
// Make first step transfer
if (partial > 0) {
result = blitMgr().readBuffer(*memory, vcmd.destination(), origin, partial);
result = blitMgr().readBuffer(*memory, vcmd.destination(), origin, partial, false, vcmd.copyMetadata());
}
// Second step transfer if something left to copy
if (partial < size[0]) {
result &= blitMgr().readBuffer(*memory, tmpHost, origin[0] + partial, size[0] - partial);
result &= blitMgr().readBuffer(*memory, tmpHost, origin[0] + partial, size[0] - partial, false, vcmd.copyMetadata());
}
}
if (nullptr != bufferFromImage) {
@@ -1237,10 +1237,10 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) {
vcmd.hostRect().slicePitch_);
if (hostMemory != nullptr) {
result = blitMgr().copyBufferRect(*memory, *hostMemory, vcmd.bufRect(), hostbufferRect,
vcmd.size(), vcmd.isEntireMemory());
vcmd.size(), vcmd.isEntireMemory(), vcmd.copyMetadata());
} else {
result = blitMgr().readBufferRect(*memory, vcmd.destination(), vcmd.bufRect(),
vcmd.hostRect(), vcmd.size(), vcmd.isEntireMemory());
vcmd.hostRect(), vcmd.size(), vcmd.isEntireMemory(), vcmd.copyMetadata());
}
} break;
case CL_COMMAND_READ_IMAGE:
@@ -1255,7 +1255,7 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) {
// Copy memory from the original image buffer into the backing store image
result = blitMgr().copyBufferToImage(*buffer, *imageBuffer->CopyImageBuffer(), offs,
offs, image->getRegion(), true,
image->getRowPitch(), image->getSlicePitch());
image->getRowPitch(), image->getSlicePitch(), vcmd.copyMetadata());
}
}
if (hostMemory != nullptr) {
@@ -1263,10 +1263,12 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) {
amd::Coord3D dstOrigin(offset);
result =
blitMgr().copyImageToBuffer(*memory, *hostMemory, vcmd.origin(), dstOrigin, vcmd.size(),
vcmd.isEntireMemory(), vcmd.rowPitch(), vcmd.slicePitch());
vcmd.isEntireMemory(), vcmd.rowPitch(), vcmd.slicePitch(),
vcmd.copyMetadata());
} else {
result = blitMgr().readImage(*memory, vcmd.destination(), vcmd.origin(), vcmd.size(),
vcmd.rowPitch(), vcmd.slicePitch(), vcmd.isEntireMemory());
vcmd.rowPitch(), vcmd.slicePitch(), vcmd.isEntireMemory(),
vcmd.copyMetadata());
}
break;
default:
@@ -1331,7 +1333,7 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) {
// Accelerated transfer without pinning
amd::Coord3D srcOrigin(offset);
result = blitMgr().copyBuffer(*hostMemory, *memory, srcOrigin, origin, size,
vcmd.isEntireMemory());
vcmd.isEntireMemory(), vcmd.copyMetadata());
} else {
// The logic below will perform 2 step copy to make sure memory pinning doesn't
// occur on the first unaligned page, because in Windows memory manager can
@@ -1349,11 +1351,11 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) {
}
// Make first step transfer
if (partial > 0) {
result = blitMgr().writeBuffer(vcmd.source(), *memory, origin, partial);
result = blitMgr().writeBuffer(vcmd.source(), *memory, origin, partial, false, vcmd.copyMetadata());
}
// Second step transfer if something left to copy
if (partial < size[0]) {
result &= blitMgr().writeBuffer(tmpHost, *memory, origin[0] + partial, size[0] - partial);
result &= blitMgr().writeBuffer(tmpHost, *memory, origin[0] + partial, size[0] - partial, false, vcmd.copyMetadata());
}
}
if (nullptr != bufferFromImage) {
@@ -1368,10 +1370,10 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) {
vcmd.hostRect().slicePitch_);
if (hostMemory != nullptr) {
result = blitMgr().copyBufferRect(*hostMemory, *memory, hostbufferRect, vcmd.bufRect(),
vcmd.size(), vcmd.isEntireMemory());
vcmd.size(), vcmd.isEntireMemory(), vcmd.copyMetadata());
} else {
result = blitMgr().writeBufferRect(vcmd.source(), *memory, vcmd.hostRect(), vcmd.bufRect(),
vcmd.size(), vcmd.isEntireMemory());
vcmd.size(), vcmd.isEntireMemory(), vcmd.copyMetadata());
}
} break;
case CL_COMMAND_WRITE_IMAGE:
@@ -1380,10 +1382,12 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) {
amd::Coord3D srcOrigin(offset);
result =
blitMgr().copyBufferToImage(*hostMemory, *memory, srcOrigin, vcmd.origin(), vcmd.size(),
vcmd.isEntireMemory(), vcmd.rowPitch(), vcmd.slicePitch());
vcmd.isEntireMemory(), vcmd.rowPitch(), vcmd.slicePitch(),
vcmd.copyMetadata());
} else {
result = blitMgr().writeImage(vcmd.source(), *memory, vcmd.origin(), vcmd.size(),
vcmd.rowPitch(), vcmd.slicePitch(), vcmd.isEntireMemory());
vcmd.rowPitch(), vcmd.slicePitch(), vcmd.isEntireMemory(),
vcmd.copyMetadata());
}
break;
default:
@@ -1404,7 +1408,8 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) {
bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memory& dstMem,
bool entire, const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect) {
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
amd::CopyMetadata copyMetadata) {
// Translate memory references and ensure cache up-to-date
pal::Memory* dstMemory = dev().getGpuMemory(&dstMem);
pal::Memory* srcMemory = dev().getGpuMemory(&srcMem);
@@ -1464,7 +1469,7 @@ bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memo
}
result = blitMgr().copyBuffer(*srcMemory, *dstMemory, realSrcOrigin, realDstOrigin, realSize,
entire);
entire, copyMetadata);
if (nullptr != bufferFromImageSrc) {
bufferFromImageSrc->release();
@@ -1474,18 +1479,18 @@ bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memo
}
} break;
case CL_COMMAND_COPY_BUFFER_RECT:
result = blitMgr().copyBufferRect(*srcMemory, *dstMemory, srcRect, dstRect, size, entire);
result = blitMgr().copyBufferRect(*srcMemory, *dstMemory, srcRect, dstRect, size, entire, copyMetadata);
break;
case CL_COMMAND_COPY_IMAGE_TO_BUFFER:
result =
blitMgr().copyImageToBuffer(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire);
blitMgr().copyImageToBuffer(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire, 0UL, 0UL, copyMetadata);
break;
case CL_COMMAND_COPY_BUFFER_TO_IMAGE:
result =
blitMgr().copyBufferToImage(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire);
blitMgr().copyBufferToImage(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire, 0UL, 0UL, copyMetadata);
break;
case CL_COMMAND_COPY_IMAGE:
result = blitMgr().copyImage(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire);
result = blitMgr().copyImage(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire, copyMetadata);
break;
default:
LogError("Unsupported command type for memory copy!");
@@ -1512,7 +1517,7 @@ void VirtualGPU::submitCopyMemory(amd::CopyMemoryCommand& vcmd) {
bool entire = vcmd.isEntireMemory();
if (!copyMemory(type, vcmd.source(), vcmd.destination(), entire, vcmd.srcOrigin(),
vcmd.dstOrigin(), vcmd.size(), vcmd.srcRect(), vcmd.dstRect())) {
vcmd.dstOrigin(), vcmd.size(), vcmd.srcRect(), vcmd.dstRect(), vcmd.copyMetadata())) {
vcmd.setStatus(CL_INVALID_OPERATION);
}
+3 -1
Просмотреть файл
@@ -630,7 +630,9 @@ class VirtualGPU : public device::VirtualDevice {
const amd::Coord3D& dstOrigin, //!< destination memory object
const amd::Coord3D& size, //!< copy size
const amd::BufferRect& srcRect, //!< region of source for copy
const amd::BufferRect& dstRect //!< region of destination for copy
const amd::BufferRect& dstRect, //!< region of destination for copy
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
);
void PrintChildren(const HSAILKernel& hsaKernel, //!< The parent HSAIL kernel
+95 -71
Просмотреть файл
@@ -61,7 +61,7 @@ bool DmaBlitManager::readMemoryStaged(Memory& srcMemory, void* dstHost, Memory&
bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
// HSA copy functionality with a possible async operation
gpu().releaseGpuMemoryFence(kSkipCpuWait);
@@ -70,7 +70,7 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
// Stall GPU before CPU access
gpu().Barriers().WaitCurrent();
return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
} else {
size_t srcSize = size[0];
size_t offset = 0;
@@ -150,7 +150,8 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
const amd::BufferRect& bufRect, const amd::BufferRect& hostRect,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
// HSA copy functionality with a possible async operation
gpu().releaseGpuMemoryFence();
@@ -159,7 +160,7 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
// Stall GPU before CPU access
gpu().Barriers().WaitCurrent();
return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire, copyMetadata);
} else {
Memory& xferBuf = dev().xferRead().acquire();
address staging = xferBuf.getDeviceMemory();
@@ -190,17 +191,17 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
bool DmaBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin,
const amd::Coord3D& size, size_t rowPitch, size_t slicePitch,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
// HSA copy functionality with a possible async operation
gpu().releaseGpuMemoryFence();
if (setup_.disableReadImage_) {
return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch,
entire);
entire, copyMetadata);
} else {
//! @todo Add HW accelerated path
return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch,
entire);
entire, copyMetadata);
}
return true;
@@ -222,13 +223,13 @@ bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, M
bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
// Use host copy if memory has direct access
if (setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess() ||
gpuMem(dstMemory).IsPersistentDirectMap()) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
} else {
// HSA copy functionality with a possible async operation
gpu().releaseGpuMemoryFence(kSkipCpuWait);
@@ -314,14 +315,14 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory,
const amd::BufferRect& hostRect,
const amd::BufferRect& bufRect, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
// HSA copy functionality with a possible async operation
gpu().releaseGpuMemoryFence();
// Use host copy if memory has direct access
if (setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess() ||
gpuMem(dstMemory).IsPersistentDirectMap()) {
return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire, copyMetadata);
} else {
Memory& xferBuf = dev().xferWrite().acquire();
address staging = xferBuf.getDeviceMemory();
@@ -352,17 +353,18 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
size_t rowPitch, size_t slicePitch, bool entire) const {
size_t rowPitch, size_t slicePitch, bool entire,
amd::CopyMetadata copyMetadata) const {
// HSA copy functionality with a possible async operation
gpu().releaseGpuMemoryFence();
if (setup_.disableWriteImage_) {
return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
entire);
entire, copyMetadata);
} else {
//! @todo Add HW accelerated path
return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
entire);
entire, copyMetadata);
}
return true;
@@ -370,13 +372,13 @@ bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire, amd::CopyMetadata copyMetadata) const {
if (setup_.disableCopyBuffer_ ||
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() &&
(dev().agent_profile() != HSA_PROFILE_FULL) && dstMemory.isHostMemDirectAccess())) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size);
return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, false, copyMetadata);
} else {
return hsaCopy(gpuMem(srcMemory), gpuMem(dstMemory), srcOrigin, dstOrigin, size);
}
@@ -387,13 +389,14 @@ bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMe
// ================================================================================================
bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
if (setup_.disableCopyBufferRect_ ||
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() &&
dstMemory.isHostMemDirectAccess())) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire);
return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire, copyMetadata);
} else {
gpu().releaseGpuMemoryFence(kSkipCpuWait);
@@ -500,7 +503,7 @@ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& d
bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire, size_t rowPitch,
size_t slicePitch) const {
size_t slicePitch, amd::CopyMetadata copyMetadata) const {
// HSA copy functionality with a possible async operation, hence make sure GPU is done
gpu().releaseGpuMemoryFence();
@@ -508,7 +511,7 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory
if (setup_.disableCopyImageToBuffer_) {
result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
} else {
Image& srcImage = static_cast<roc::Image&>(srcMemory);
Buffer& dstBuffer = static_cast<roc::Buffer&>(dstMemory);
@@ -534,7 +537,7 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory
// Check if a HostBlit transfer is required
if (completeOperation_ && !result) {
result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
}
}
@@ -544,7 +547,7 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory
bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire, size_t rowPitch,
size_t slicePitch) const {
size_t slicePitch, amd::CopyMetadata copyMetadata) const {
// HSA copy functionality with a possible async operation, hence make sure GPU is done
gpu().releaseGpuMemoryFence();
@@ -552,7 +555,7 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
if (setup_.disableCopyBufferToImage_) {
result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
} else {
Buffer& srcBuffer = static_cast<roc::Buffer&>(srcMemory);
Image& dstImage = static_cast<roc::Image&>(dstMemory);
@@ -579,7 +582,7 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
// Check if a HostBlit tran sfer is required
if (completeOperation_ && !result) {
result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
}
}
@@ -588,17 +591,20 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
bool DmaBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
// HSA copy functionality with a possible async operation, hence make sure GPU is done
gpu().releaseGpuMemoryFence();
bool result = false;
if (setup_.disableCopyImage_) {
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
copyMetadata);
} else {
//! @todo Add HW accelerated path
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
copyMetadata);
}
return result;
@@ -878,7 +884,8 @@ const uint RejectedFormatChannelTotal = sizeof(RejectedOrder) / sizeof(FormatCon
bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
bool entire, size_t rowPitch, size_t slicePitch) const {
bool entire, size_t rowPitch, size_t slicePitch,
amd::CopyMetadata copyMetadata) const {
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
@@ -890,7 +897,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
if (setup_.disableCopyBufferToImage_) {
result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
synchronize();
return result;
}
@@ -902,7 +909,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
// but there are restriciton with older hardware
if (dev().settings().imageDMA_) {
result = DmaBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
if (result) {
synchronize();
return result;
@@ -912,7 +919,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
if (!result) {
result = copyBufferToImageKernel(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
rowPitch, slicePitch);
rowPitch, slicePitch, copyMetadata);
}
synchronize();
@@ -949,7 +956,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire,
size_t rowPitch, size_t slicePitch) const {
size_t rowPitch, size_t slicePitch,
amd::CopyMetadata copyMetadata) const {
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
@@ -995,7 +1003,7 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
// Fall into the host path if the image format was rejected
if (rejected) {
return DmaBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
}
// Use a common blit type with three dimensions by default
@@ -1096,7 +1104,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
bool entire, size_t rowPitch, size_t slicePitch) const {
bool entire, size_t rowPitch, size_t slicePitch,
amd::CopyMetadata copyMetadata) const {
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
@@ -1108,7 +1117,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
if (setup_.disableCopyImageToBuffer_) {
result = DmaBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
synchronize();
return result;
}
@@ -1122,7 +1131,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
// single step SDMA is causing corruption and the cause is under investigation
if (dev().settings().imageDMA_) {
result = DmaBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
if (result) {
synchronize();
return result;
@@ -1132,7 +1141,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
if (!result) {
result = copyImageToBufferKernel(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
rowPitch, slicePitch);
rowPitch, slicePitch, copyMetadata);
}
synchronize();
@@ -1145,7 +1154,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire,
size_t rowPitch, size_t slicePitch) const {
size_t rowPitch, size_t slicePitch,
amd::CopyMetadata copyMetadata) const {
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
@@ -1190,7 +1200,7 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
// Fall into the host path if the image format was rejected
if (rejected) {
return DmaBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
entire, rowPitch, slicePitch);
entire, rowPitch, slicePitch, copyMetadata);
}
uint blitType = BlitCopyImageToBuffer;
@@ -1296,7 +1306,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire) const {
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
@@ -1348,7 +1359,8 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst
// Fall into the host path for the entire 2D copy or
// if the image format was rejected
if (rejected) {
result = DmaBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
result = DmaBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
copyMetadata);
synchronize();
return result;
}
@@ -1460,7 +1472,8 @@ void FindPinSize(size_t& pinSize, const amd::Coord3D& size, size_t& rowPitch, si
bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
const amd::Coord3D& origin, const amd::Coord3D& size,
size_t rowPitch, size_t slicePitch, bool entire) const {
size_t rowPitch, size_t slicePitch, bool entire,
amd::CopyMetadata copyMetadata) const {
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
@@ -1471,7 +1484,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
if (setup_.disableReadImage_ || (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
result = HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire);
result = HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire,
copyMetadata);
synchronize();
return result;
} else {
@@ -1484,7 +1498,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
if (amdMemory == nullptr) {
// Force SW copy
result =
DmaBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire);
DmaBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire,
copyMetadata);
synchronize();
return result;
}
@@ -1497,7 +1512,7 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
// Copy image to buffer
result = copyImageToBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire, rowPitch,
slicePitch);
slicePitch, copyMetadata);
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
@@ -1510,7 +1525,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
size_t rowPitch, size_t slicePitch, bool entire) const {
size_t rowPitch, size_t slicePitch, bool entire,
amd::CopyMetadata copyMetadata) const {
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
@@ -1521,7 +1537,8 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
if (setup_.disableWriteImage_ || dstMemory.isHostMemDirectAccess()) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
result = HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire);
result = HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire,
copyMetadata);
synchronize();
return result;
} else {
@@ -1534,7 +1551,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
if (amdMemory == nullptr) {
// Force SW copy
result = DmaBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
entire);
entire, copyMetadata);
synchronize();
return result;
}
@@ -1547,7 +1564,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
// Copy image to buffer
result = copyBufferToImage(*srcMemory, dstMemory, srcOrigin, origin, size, entire, rowPitch,
slicePitch);
slicePitch, copyMetadata);
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
@@ -1561,7 +1578,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::BufferRect& srcRectIn,
const amd::BufferRect& dstRectIn, const amd::Coord3D& sizeIn,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
bool rejected = false;
@@ -1569,7 +1586,8 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory
// Fall into the ROC path for rejected transfers
if (dev().info().pcie_atomics_ && (setup_.disableCopyBufferRect_ ||
srcMemory.isHostMemDirectAccess() || dstMemory.isHostMemDirectAccess())) {
result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire);
result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire,
copyMetadata);
if (result) {
synchronize();
@@ -1686,7 +1704,7 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory
// ================================================================================================
bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
@@ -1716,7 +1734,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
if (setup_.disableReadBuffer_ || (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
synchronize();
return result;
} else {
@@ -1728,7 +1746,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
if (amdMemory == nullptr) {
// Force SW copy
result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
synchronize();
return result;
}
@@ -1740,12 +1758,12 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
Memory* dstMemory = dev().getRocMemory(amdMemory);
// Copy image to buffer
result = copyBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire);
result = copyBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire, copyMetadata);
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
} else {
result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
}
}
@@ -1758,7 +1776,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
const amd::BufferRect& bufRect,
const amd::BufferRect& hostRect, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
@@ -1767,7 +1785,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire,
copyMetadata);
synchronize();
return result;
} else {
@@ -1777,7 +1796,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
if (amdMemory == nullptr) {
// Force SW copy
result = DmaBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
result = DmaBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire,
copyMetadata);
synchronize();
return result;
}
@@ -1793,7 +1813,7 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
Memory* dstMemory = dev().getRocMemory(amdMemory);
// Copy image to buffer
result = copyBufferRect(srcMemory, *dstMemory, bufRect, rect, size, entire);
result = copyBufferRect(srcMemory, *dstMemory, bufRect, rect, size, entire, copyMetadata);
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
@@ -1807,7 +1827,7 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
// ================================================================================================
bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
@@ -1832,7 +1852,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
gpuMem(dstMemory).IsPersistentDirectMap()) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
synchronize();
return result;
} else {
@@ -1845,7 +1865,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
if (amdMemory == nullptr) {
// Force SW copy
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
synchronize();
return result;
}
@@ -1857,12 +1877,12 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
Memory* srcMemory = dev().getRocMemory(amdMemory);
// Copy buffer rect
result = copyBuffer(*srcMemory, dstMemory, srcOrigin, origin, size, entire);
result = copyBuffer(*srcMemory, dstMemory, srcOrigin, origin, size, entire, copyMetadata);
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
} else {
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
}
}
@@ -1875,7 +1895,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory,
const amd::BufferRect& hostRect,
const amd::BufferRect& bufRect, const amd::Coord3D& size,
bool entire) const {
bool entire, amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
@@ -1884,7 +1904,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
gpuMem(dstMemory).IsPersistentDirectMap()) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire,
copyMetadata);
synchronize();
return result;
} else {
@@ -1894,7 +1915,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
if (amdMemory == nullptr) {
// Force DMA copy with staging
result = DmaBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
result = DmaBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire,
copyMetadata);
synchronize();
return result;
}
@@ -1913,7 +1935,7 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
rect.end_ = hostRect.end_;
// Copy buffer rect
result = copyBufferRect(*srcMemory, dstMemory, rect, bufRect, size, entire);
result = copyBufferRect(*srcMemory, dstMemory, rect, bufRect, size, entire, copyMetadata);
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
@@ -2151,7 +2173,8 @@ bool KernelBlitManager::fillBuffer3D(device::Memory& memory, const void* pattern
// ================================================================================================
bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& sizeIn, bool entire) const {
const amd::Coord3D& sizeIn, bool entire,
amd::CopyMetadata copyMetadata) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
bool p2p = (&gpuMem(srcMemory).dev() != &gpuMem(dstMemory).dev()) &&
@@ -2247,7 +2270,8 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds
gpu().SetCopyCommandType(CL_COMMAND_READ_BUFFER);
}
}
result = DmaBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire);
result = DmaBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire,
copyMetadata);
}
synchronize();
+72 -24
Просмотреть файл
@@ -59,7 +59,9 @@ class DmaBlitManager : public device::HostBlitManager {
void* dstHost, //!< Destination host memory
const amd::Coord3D& origin, //!< Source origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies a buffer object to system memory
@@ -68,7 +70,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::BufferRect& bufRect, //!< Source rectangle
const amd::BufferRect& hostRect, //!< Destination rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies an image object to system memory
@@ -78,7 +82,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
size_t rowPitch, //!< Row pitch for host memory
size_t slicePitch, //!< Slice pitch for host memory
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies system memory to a buffer object
@@ -86,7 +92,9 @@ class DmaBlitManager : public device::HostBlitManager {
device::Memory& dstMemory, //!< Destination memory object
const amd::Coord3D& origin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies system memory to a buffer object
@@ -95,7 +103,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::BufferRect& hostRect, //!< Destination rectangle
const amd::BufferRect& bufRect, //!< Source rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies system memory to an image object
@@ -105,7 +115,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
size_t rowPitch, //!< Row pitch for host memory
size_t slicePitch, //!< Slice pitch for host memory
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies a buffer object to another buffer object
@@ -114,7 +126,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::Coord3D& srcOrigin, //!< Source origin
const amd::Coord3D& dstOrigin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies a buffer object to another buffer object
@@ -123,7 +137,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::BufferRect& srcRect, //!< Source rectangle
const amd::BufferRect& dstRect, //!< Destination rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies an image object to a buffer object
@@ -134,7 +150,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies a buffer object to an image object
@@ -145,7 +163,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies an image object to another image object
@@ -154,7 +174,9 @@ class DmaBlitManager : public device::HostBlitManager {
const amd::Coord3D& srcOrigin, //!< Source origin
const amd::Coord3D& dstOrigin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Stream memory write operation - Write a 'value' at 'memory'.
@@ -290,7 +312,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::BufferRect& srcRectIn, //!< Source rectangle
const amd::BufferRect& dstRectIn, //!< Destination rectangle
const amd::Coord3D& sizeIn, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies a buffer object to system memory
@@ -298,7 +322,9 @@ class KernelBlitManager : public DmaBlitManager {
void* dstHost, //!< Destination host memory
const amd::Coord3D& origin, //!< Source origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies a buffer object to system memory
@@ -307,7 +333,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::BufferRect& bufRect, //!< Source rectangle
const amd::BufferRect& hostRect, //!< Destination rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies system memory to a buffer object
@@ -315,7 +343,9 @@ class KernelBlitManager : public DmaBlitManager {
device::Memory& dstMemory, //!< Destination memory object
const amd::Coord3D& origin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies system memory to a buffer object
@@ -324,7 +354,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::BufferRect& hostRect, //!< Destination rectangle
const amd::BufferRect& bufRect, //!< Source rectangle
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies a buffer object to an image object
@@ -333,7 +365,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& srcOrigin, //!< Source origin
const amd::Coord3D& dstOrigin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies a buffer object to an image object
@@ -344,7 +378,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies an image object to a buffer object
@@ -355,7 +391,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies an image object to another image object
@@ -364,7 +402,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& srcOrigin, //!< Source origin
const amd::Coord3D& dstOrigin, //!< Destination origin
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies an image object to system memory
@@ -374,7 +414,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
size_t rowPitch, //!< Row pitch for host memory
size_t slicePitch, //!< Slice pitch for host memory
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Copies system memory to an image object
@@ -384,7 +426,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
size_t rowPitch, //!< Row pitch for host memory
size_t slicePitch, //!< Slice pitch for host memory
bool entire = false //!< Entire buffer will be updated
bool entire = false, //!< Entire buffer will be updated
amd::CopyMetadata copyMetadata =
amd::CopyMetadata()//!< Memory copy MetaData
) const;
//! Fills a buffer memory with a pattern data
@@ -481,7 +525,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Copies an image object to a buffer object
@@ -492,7 +538,9 @@ class KernelBlitManager : public DmaBlitManager {
const amd::Coord3D& size, //!< Size of the copy region
bool entire = false, //!< Entire buffer will be updated
size_t rowPitch = 0, //!< Pitch for buffer
size_t slicePitch = 0 //!< Slice for buffer
size_t slicePitch = 0, //!< Slice for buffer
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
) const;
//! Creates a program for all blit operations
+26 -19
Просмотреть файл
@@ -1534,9 +1534,9 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& cmd) {
// Accelerated transfer without pinning
amd::Coord3D dstOrigin(offset);
result = blitMgr().copyBuffer(*devMem, *hostMemory, origin, dstOrigin, size,
cmd.isEntireMemory());
cmd.isEntireMemory(), cmd.copyMetadata());
} else {
result = blitMgr().readBuffer(*devMem, dst, origin, size, cmd.isEntireMemory());
result = blitMgr().readBuffer(*devMem, dst, origin, size, cmd.isEntireMemory(), cmd.copyMetadata());
}
break;
}
@@ -1548,10 +1548,10 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& cmd) {
cmd.hostRect().slicePitch_);
if (hostMemory != nullptr) {
result = blitMgr().copyBufferRect(*devMem, *hostMemory, cmd.bufRect(), hostbufferRect,
size, cmd.isEntireMemory());
size, cmd.isEntireMemory(), cmd.copyMetadata());
} else {
result = blitMgr().readBufferRect(*devMem, dst, cmd.bufRect(), cmd.hostRect(), size,
cmd.isEntireMemory());
cmd.isEntireMemory(), cmd.copyMetadata());
}
break;
}
@@ -1576,10 +1576,10 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& cmd) {
amd::Coord3D dstOrigin(offset);
result =
blitMgr().copyImageToBuffer(*devMem, *hostMemory, cmd.origin(), dstOrigin, size,
cmd.isEntireMemory(), cmd.rowPitch(), cmd.slicePitch());
cmd.isEntireMemory(), cmd.rowPitch(), cmd.slicePitch(), cmd.copyMetadata());
} else {
result = blitMgr().readImage(*devMem, dst, cmd.origin(), size, cmd.rowPitch(),
cmd.slicePitch(), cmd.isEntireMemory());
cmd.slicePitch(), cmd.isEntireMemory(), cmd.copyMetadata());
}
break;
}
@@ -1641,9 +1641,9 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& cmd) {
// Accelerated transfer without pinning
amd::Coord3D srcOrigin(offset);
result = blitMgr().copyBuffer(*hostMemory, *devMem, srcOrigin, origin, size,
cmd.isEntireMemory());
cmd.isEntireMemory(), cmd.copyMetadata());
} else {
result = blitMgr().writeBuffer(src, *devMem, origin, size, cmd.isEntireMemory());
result = blitMgr().writeBuffer(src, *devMem, origin, size, cmd.isEntireMemory(), cmd.copyMetadata());
}
break;
}
@@ -1655,10 +1655,10 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& cmd) {
cmd.hostRect().slicePitch_);
if (hostMemory != nullptr) {
result = blitMgr().copyBufferRect(*hostMemory, *devMem, hostbufferRect, cmd.bufRect(),
size, cmd.isEntireMemory());
size, cmd.isEntireMemory(), cmd.copyMetadata());
} else {
result = blitMgr().writeBufferRect(src, *devMem, cmd.hostRect(), cmd.bufRect(), size,
cmd.isEntireMemory());
cmd.isEntireMemory(), cmd.copyMetadata());
}
break;
}
@@ -1668,10 +1668,11 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& cmd) {
amd::Coord3D srcOrigin(offset);
result =
blitMgr().copyBufferToImage(*hostMemory, *devMem, srcOrigin, cmd.origin(), size,
cmd.isEntireMemory(), cmd.rowPitch(), cmd.slicePitch());
cmd.isEntireMemory(), cmd.rowPitch(), cmd.slicePitch(),
cmd.copyMetadata());
} else {
result = blitMgr().writeImage(src, *devMem, cmd.origin(), size, cmd.rowPitch(),
cmd.slicePitch(), cmd.isEntireMemory());
cmd.slicePitch(), cmd.isEntireMemory(), cmd.copyMetadata());
}
break;
}
@@ -1752,7 +1753,8 @@ void VirtualGPU::submitSvmPrefetchAsync(amd::SvmPrefetchAsyncCommand& cmd) {
bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memory& dstMem,
bool entire, const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect) {
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
amd::CopyMetadata copyMetadata) {
Memory* srcDevMem = dev().getRocMemory(&srcMem);
Memory* dstDevMem = dev().getRocMemory(&dstMem);
@@ -1796,23 +1798,28 @@ bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memo
realSize.c[0] *= elemSize;
}
result = blitMgr().copyBuffer(*srcDevMem, *dstDevMem, realSrcOrigin, realDstOrigin, realSize, entire);
result = blitMgr().copyBuffer(*srcDevMem, *dstDevMem, realSrcOrigin, realDstOrigin,
realSize, entire, copyMetadata);
break;
}
case CL_COMMAND_COPY_BUFFER_RECT: {
result = blitMgr().copyBufferRect(*srcDevMem, *dstDevMem, srcRect, dstRect, size, entire);
result = blitMgr().copyBufferRect(*srcDevMem, *dstDevMem, srcRect, dstRect, size, entire,
copyMetadata);
break;
}
case CL_COMMAND_COPY_IMAGE: {
result = blitMgr().copyImage(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire);
result = blitMgr().copyImage(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire,
copyMetadata);
break;
}
case CL_COMMAND_COPY_IMAGE_TO_BUFFER: {
result = blitMgr().copyImageToBuffer(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire);
result = blitMgr().copyImageToBuffer(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire,
0UL, 0UL, copyMetadata);
break;
}
case CL_COMMAND_COPY_BUFFER_TO_IMAGE: {
result = blitMgr().copyBufferToImage(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire);
result = blitMgr().copyBufferToImage(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire,
0UL, 0UL, copyMetadata);
break;
}
default:
@@ -1841,7 +1848,7 @@ void VirtualGPU::submitCopyMemory(amd::CopyMemoryCommand& cmd) {
bool entire = cmd.isEntireMemory();
if (!copyMemory(type, cmd.source(), cmd.destination(), entire, cmd.srcOrigin(),
cmd.dstOrigin(), cmd.size(), cmd.srcRect(), cmd.dstRect())) {
cmd.dstOrigin(), cmd.size(), cmd.srcRect(), cmd.dstRect(), cmd.copyMetadata())) {
cmd.setStatus(CL_INVALID_OPERATION);
}
+3 -1
Просмотреть файл
@@ -470,7 +470,9 @@ class VirtualGPU : public device::VirtualDevice {
const amd::Coord3D& dstOrigin, //!< destination memory object
const amd::Coord3D& size, //!< copy size
const amd::BufferRect& srcRect, //!< region of source for copy
const amd::BufferRect& dstRect //!< region of destination for copy
const amd::BufferRect& dstRect, //!< region of destination for copy
amd::CopyMetadata copyMetadata =
amd::CopyMetadata() //!< Memory copy MetaData
);
//! Updates AQL header for the upcomming dispatch
+51 -17
Просмотреть файл
@@ -235,6 +235,25 @@ class Event : public RuntimeObject {
void setEventScope(int32_t scope) { event_scope_ = scope; }
};
union CopyMetadata {
enum CopyEnginePreference {
NONE = 0,
BLIT = 1,
SDMA = 2,
CPDMA = 3
};
struct {
uint32_t isAsync_ : 1;
uint32_t copyEnginePreference_ : 2;
};
uint32_t flags_;
CopyMetadata() : flags_(0){}
CopyMetadata(bool isAsync, CopyEnginePreference copyEnginePreference)
: isAsync_(isAsync), copyEnginePreference_(copyEnginePreference){}
};
/*! \brief An operation that is submitted to a command queue.
*
* %Command is the abstract base type of all OpenCL operations
@@ -463,18 +482,20 @@ class ReadMemoryCommand : public OneMemoryArgCommand {
BufferRect bufRect_; //!< Buffer rectangle information
BufferRect hostRect_; //!< Host memory rectangle information
amd::CopyMetadata copyMetadata_;
public:
//! Construct a new ReadMemoryCommand
ReadMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList,
Memory& memory, Coord3D origin, Coord3D size, void* hostPtr,
size_t rowPitch = 0, size_t slicePitch = 0)
size_t rowPitch = 0, size_t slicePitch = 0,
amd::CopyMetadata copyMetadata = amd::CopyMetadata())
: OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
origin_(origin),
size_(size),
hostPtr_(hostPtr),
rowPitch_(rowPitch),
slicePitch_(slicePitch) {
slicePitch_(slicePitch),
copyMetadata_(copyMetadata) {
// Sanity checks
assert(hostPtr != NULL && "hostPtr cannot be null");
assert(size.c[0] > 0 && "invalid");
@@ -483,7 +504,8 @@ class ReadMemoryCommand : public OneMemoryArgCommand {
//! Construct a new ReadMemoryCommand
ReadMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList,
Memory& memory, Coord3D origin, Coord3D size, void* hostPtr,
const BufferRect& bufRect, const BufferRect& hostRect)
const BufferRect& bufRect, const BufferRect& hostRect,
amd::CopyMetadata copyMetadata = amd::CopyMetadata())
: OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
origin_(origin),
size_(size),
@@ -491,7 +513,8 @@ class ReadMemoryCommand : public OneMemoryArgCommand {
rowPitch_(0),
slicePitch_(0),
bufRect_(bufRect),
hostRect_(hostRect) {
hostRect_(hostRect),
copyMetadata_(copyMetadata) {
// Sanity checks
assert(hostPtr != NULL && "hostPtr cannot be null");
assert(size.c[0] > 0 && "invalid");
@@ -517,7 +540,8 @@ class ReadMemoryCommand : public OneMemoryArgCommand {
const BufferRect& bufRect() const { return bufRect_; }
//! Return the host rectangle information
const BufferRect& hostRect() const { return hostRect_; }
//! Return the copy MetaData
amd::CopyMetadata copyMetadata() const { return copyMetadata_; }
//! Updates the host memory to read from
void setSource(Memory& memory) { memory_ = &memory; }
//! Updates the host memory to write to
@@ -580,17 +604,20 @@ class WriteMemoryCommand : public OneMemoryArgCommand {
BufferRect bufRect_; //!< Buffer rectangle information
BufferRect hostRect_; //!< Host memory rectangle information
amd::CopyMetadata copyMetadata_;
public:
WriteMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList,
Memory& memory, Coord3D origin, Coord3D size, const void* hostPtr,
size_t rowPitch = 0, size_t slicePitch = 0)
size_t rowPitch = 0, size_t slicePitch = 0,
amd::CopyMetadata copyMetadata = amd::CopyMetadata())
: OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
origin_(origin),
size_(size),
hostPtr_(hostPtr),
rowPitch_(rowPitch),
slicePitch_(slicePitch) {
slicePitch_(slicePitch),
copyMetadata_(copyMetadata){
// Sanity checks
assert(hostPtr != NULL && "hostPtr cannot be null");
assert(size.c[0] > 0 && "invalid");
@@ -598,7 +625,8 @@ class WriteMemoryCommand : public OneMemoryArgCommand {
WriteMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList,
Memory& memory, Coord3D origin, Coord3D size, const void* hostPtr,
const BufferRect& bufRect, const BufferRect& hostRect)
const BufferRect& bufRect, const BufferRect& hostRect,
amd::CopyMetadata copyMetadata = amd::CopyMetadata())
: OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
origin_(origin),
size_(size),
@@ -606,7 +634,8 @@ class WriteMemoryCommand : public OneMemoryArgCommand {
rowPitch_(0),
slicePitch_(0),
bufRect_(bufRect),
hostRect_(hostRect) {
hostRect_(hostRect),
copyMetadata_(copyMetadata){
// Sanity checks
assert(hostPtr != NULL && "hostPtr cannot be null");
assert(size.c[0] > 0 && "invalid");
@@ -632,7 +661,8 @@ class WriteMemoryCommand : public OneMemoryArgCommand {
const BufferRect& bufRect() const { return bufRect_; }
//! Return the host rectangle information
const BufferRect& hostRect() const { return hostRect_; }
//! Return the copy MetaData
amd::CopyMetadata copyMetadata() const { return copyMetadata_; }
//! Updates the host memory to read from
void setSource(const void* hostPtr) { hostPtr_ = hostPtr; }
//! Updates the host memory to write to
@@ -831,28 +861,31 @@ class CopyMemoryCommand : public TwoMemoryArgsCommand {
BufferRect srcRect_; //!< Source buffer rectangle information
BufferRect dstRect_; //!< Destination buffer rectangle information
amd::CopyMetadata copyMetadata_;
public:
CopyMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList,
Memory& srcMemory, Memory& dstMemory, Coord3D srcOrigin, Coord3D dstOrigin,
Coord3D size)
Coord3D size, amd::CopyMetadata copyMetadata = amd::CopyMetadata())
: TwoMemoryArgsCommand(queue, cmdType, eventWaitList, srcMemory, dstMemory),
srcOrigin_(srcOrigin),
dstOrigin_(dstOrigin),
size_(size) {
size_(size),
copyMetadata_(copyMetadata){
// Sanity checks
assert(size.c[0] > 0 && "invalid");
}
CopyMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList,
Memory& srcMemory, Memory& dstMemory, Coord3D srcOrigin, Coord3D dstOrigin,
Coord3D size, const BufferRect& srcRect, const BufferRect& dstRect)
Coord3D size, const BufferRect& srcRect, const BufferRect& dstRect,
amd::CopyMetadata copyMetadata = amd::CopyMetadata())
: TwoMemoryArgsCommand(queue, cmdType, eventWaitList, srcMemory, dstMemory),
srcOrigin_(srcOrigin),
dstOrigin_(dstOrigin),
size_(size),
srcRect_(srcRect),
dstRect_(dstRect) {
dstRect_(dstRect),
copyMetadata_(copyMetadata) {
// Sanity checks
assert(size.c[0] > 0 && "invalid");
}
@@ -875,7 +908,8 @@ class CopyMemoryCommand : public TwoMemoryArgsCommand {
const BufferRect& srcRect() const { return srcRect_; }
//! Return the destination buffer rectangle information
const BufferRect& dstRect() const { return dstRect_; }
//! Return the copy MetaData
amd::CopyMetadata copyMetadata() const { return copyMetadata_; }
//! Updates the host memory to read from
void setSource(Memory& srcMemory) { memory1_ = &srcMemory; }
//! Updates the memory object to write to.