SWDEV-369581 - Convey copy API metadata to ROCclr
Change-Id: I569462d6d268700d419510255e201bf7d80d6714
[ROCm/clr commit: 72b45e2a1f]
Этот коммит содержится в:
@@ -32,7 +32,7 @@ HostBlitManager::HostBlitManager(VirtualDevice& vDev, Setup setup)
|
||||
|
||||
bool HostBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
// Map the device memory to CPU visible
|
||||
void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly);
|
||||
if (NULL == src) {
|
||||
@@ -52,7 +52,7 @@ bool HostBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
bool HostBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::BufferRect& bufRect,
|
||||
const amd::BufferRect& hostRect, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
// Map source memory
|
||||
void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly);
|
||||
if (src == NULL) {
|
||||
@@ -82,7 +82,8 @@ bool HostBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
bool HostBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
size_t rowPitch, size_t slicePitch, bool entire) const {
|
||||
size_t rowPitch, size_t slicePitch, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
size_t startLayer = origin[2];
|
||||
size_t numLayers = size[2];
|
||||
if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
||||
@@ -148,7 +149,7 @@ bool HostBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
bool HostBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
uint flags = 0;
|
||||
if (entire) {
|
||||
flags = Memory::CpuWriteOnly;
|
||||
@@ -173,7 +174,7 @@ bool HostBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory
|
||||
bool HostBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::BufferRect& hostRect,
|
||||
const amd::BufferRect& bufRect, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
// Map destination memory
|
||||
void* dst = dstMemory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0);
|
||||
if (dst == NULL) {
|
||||
@@ -203,7 +204,8 @@ bool HostBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMe
|
||||
|
||||
bool HostBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
size_t rowPitch, size_t slicePitch, bool entire) const {
|
||||
size_t rowPitch, size_t slicePitch, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
uint flags = 0;
|
||||
if (entire) {
|
||||
flags = Memory::CpuWriteOnly;
|
||||
@@ -272,7 +274,8 @@ bool HostBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
|
||||
|
||||
bool HostBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
// Map source memory
|
||||
void* src = srcMemory.cpuMap(vDev_,
|
||||
// Overlap detection
|
||||
@@ -302,7 +305,8 @@ bool HostBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstM
|
||||
|
||||
bool HostBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
// Map source memory
|
||||
void* src = srcMemory.cpuMap(vDev_,
|
||||
// Overlap detection
|
||||
@@ -340,7 +344,8 @@ bool HostBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory&
|
||||
bool HostBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin,
|
||||
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
|
||||
bool entire, size_t rowPitch, size_t slicePitch) const {
|
||||
bool entire, size_t rowPitch, size_t slicePitch,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
size_t startLayer = srcOrigin[2];
|
||||
size_t numLayers = size[2];
|
||||
if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
||||
@@ -405,7 +410,8 @@ bool HostBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memor
|
||||
bool HostBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin,
|
||||
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
|
||||
bool entire, size_t rowPitch, size_t slicePitch) const {
|
||||
bool entire, size_t rowPitch, size_t slicePitch,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
// Map source memory
|
||||
void* src = srcMemory.cpuMap(vDev_, Memory::CpuReadOnly);
|
||||
if (src == NULL) {
|
||||
@@ -469,7 +475,8 @@ bool HostBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memor
|
||||
|
||||
bool HostBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
size_t startLayer = srcOrigin[2];
|
||||
size_t numLayers = size[2];
|
||||
if (srcMemory.owner()->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
||||
|
||||
@@ -76,7 +76,9 @@ class BlitManager : public amd::HeapObject {
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const = 0;
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
@@ -85,7 +87,9 @@ class BlitManager : public amd::HeapObject {
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const = 0;
|
||||
|
||||
//! Copies an image object to system memory
|
||||
@@ -95,7 +99,9 @@ class BlitManager : public amd::HeapObject {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const = 0;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
@@ -103,7 +109,9 @@ class BlitManager : public amd::HeapObject {
|
||||
Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const = 0;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
@@ -112,7 +120,9 @@ class BlitManager : public amd::HeapObject {
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const = 0;
|
||||
|
||||
//! Copies system memory to an image object
|
||||
@@ -122,7 +132,9 @@ class BlitManager : public amd::HeapObject {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const = 0;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
@@ -131,7 +143,9 @@ class BlitManager : public amd::HeapObject {
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const = 0;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
@@ -140,7 +154,9 @@ class BlitManager : public amd::HeapObject {
|
||||
const amd::BufferRect& srcRect, //!< Source rectangle
|
||||
const amd::BufferRect& dstRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const = 0;
|
||||
|
||||
//! Copies an image object to a buffer object
|
||||
@@ -151,7 +167,9 @@ class BlitManager : public amd::HeapObject {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const = 0;
|
||||
|
||||
//! Copies a buffer object to an image object
|
||||
@@ -162,7 +180,9 @@ class BlitManager : public amd::HeapObject {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const = 0;
|
||||
|
||||
//! Copies an image object to another image object
|
||||
@@ -171,7 +191,9 @@ class BlitManager : public amd::HeapObject {
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const = 0;
|
||||
|
||||
//! Fills a buffer memory with a pattern data
|
||||
@@ -248,7 +270,9 @@ class HostBlitManager : public device::BlitManager {
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
@@ -257,7 +281,9 @@ class HostBlitManager : public device::BlitManager {
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to system memory
|
||||
@@ -267,7 +293,9 @@ class HostBlitManager : public device::BlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
@@ -275,7 +303,9 @@ class HostBlitManager : public device::BlitManager {
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
@@ -284,7 +314,9 @@ class HostBlitManager : public device::BlitManager {
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to an image object
|
||||
@@ -294,7 +326,9 @@ class HostBlitManager : public device::BlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
@@ -303,7 +337,9 @@ class HostBlitManager : public device::BlitManager {
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
@@ -312,7 +348,9 @@ class HostBlitManager : public device::BlitManager {
|
||||
const amd::BufferRect& srcRect, //!< Source rectangle
|
||||
const amd::BufferRect& dstRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to a buffer object
|
||||
@@ -323,7 +361,9 @@ class HostBlitManager : public device::BlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to an image object
|
||||
@@ -334,7 +374,9 @@ class HostBlitManager : public device::BlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to another image object
|
||||
@@ -343,7 +385,9 @@ class HostBlitManager : public device::BlitManager {
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Fills a buffer memory with a pattern data
|
||||
|
||||
@@ -117,12 +117,12 @@ bool DmaBlitManager::readMemoryStaged(Memory& srcMemory, void* dstHost, Memory**
|
||||
|
||||
bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
// Use host copy if memory has direct access
|
||||
if (setup_.disableReadBuffer_ ||
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
|
||||
return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
|
||||
} else {
|
||||
size_t srcSize = size[0];
|
||||
size_t offset = 0;
|
||||
@@ -206,12 +206,14 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::BufferRect& bufRect, const amd::BufferRect& hostRect,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
// Use host copy if memory has direct access
|
||||
if (setup_.disableReadBufferRect_ ||
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
|
||||
return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire,
|
||||
copyMetadata);
|
||||
} else {
|
||||
Memory& xferBuf = dev().xferRead().acquire();
|
||||
|
||||
@@ -259,16 +261,16 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
bool DmaBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin,
|
||||
const amd::Coord3D& size, size_t rowPitch, size_t slicePitch,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
if (setup_.disableReadImage_) {
|
||||
return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
entire, copyMetadata);
|
||||
} else {
|
||||
//! @todo Add HW accelerated path
|
||||
return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
entire, copyMetadata);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -327,14 +329,14 @@ bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, M
|
||||
|
||||
bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
// Use host copy if memory has direct access or it's persistent
|
||||
if (setup_.disableWriteBuffer_ ||
|
||||
(gpuMem(dstMemory).isHostMemDirectAccess() &&
|
||||
(gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
|
||||
gpuMem(dstMemory).isPersistentDirectMap()) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
|
||||
return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
|
||||
} else {
|
||||
size_t dstSize = size[0];
|
||||
size_t offset = 0;
|
||||
@@ -416,14 +418,15 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
|
||||
bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::BufferRect& hostRect,
|
||||
const amd::BufferRect& bufRect, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
// Use host copy if memory has direct access or it's persistent
|
||||
if (setup_.disableWriteBufferRect_ ||
|
||||
(dstMemory.isHostMemDirectAccess() &&
|
||||
(gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
|
||||
gpuMem(dstMemory).isPersistentDirectMap()) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
|
||||
return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire,
|
||||
copyMetadata);
|
||||
} else {
|
||||
Memory& xferBuf = gpu().xferWrite().Acquire(std::min(gpu().xferWrite().MaxSize(), size[0]));
|
||||
|
||||
@@ -473,15 +476,16 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
|
||||
|
||||
bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
size_t rowPitch, size_t slicePitch, bool entire) const {
|
||||
size_t rowPitch, size_t slicePitch, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
if (setup_.disableWriteImage_) {
|
||||
return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
entire, copyMetadata);
|
||||
} else {
|
||||
//! @todo Add HW accelerated path
|
||||
return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
entire, copyMetadata);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -489,12 +493,14 @@ bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
|
||||
|
||||
bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
if (setup_.disableCopyBuffer_ ||
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable() &&
|
||||
!dev().settings().apuSystem_ && gpuMem(dstMemory).isHostMemDirectAccess())) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size);
|
||||
return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, false,
|
||||
copyMetadata);
|
||||
} else {
|
||||
return gpuMem(srcMemory).partialMemCopyTo(gpu(), srcOrigin, dstOrigin, size, gpuMem(dstMemory));
|
||||
}
|
||||
@@ -504,12 +510,14 @@ bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMe
|
||||
|
||||
bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
if (setup_.disableCopyBufferRect_ ||
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable() &&
|
||||
gpuMem(dstMemory).isHostMemDirectAccess())) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire);
|
||||
return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire,
|
||||
copyMetadata);
|
||||
} else {
|
||||
size_t srcOffset;
|
||||
size_t dstOffset;
|
||||
@@ -580,12 +588,12 @@ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& d
|
||||
bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire, size_t rowPitch,
|
||||
size_t slicePitch) const {
|
||||
size_t slicePitch, amd::CopyMetadata copyMetadata) const {
|
||||
bool result = false;
|
||||
if (setup_.disableCopyImageToBuffer_) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
} else {
|
||||
// Use PAL path for a transfer
|
||||
result =
|
||||
@@ -595,7 +603,7 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory
|
||||
if (completeOperation_ && !result) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -605,12 +613,12 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory
|
||||
bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire, size_t rowPitch,
|
||||
size_t slicePitch) const {
|
||||
size_t slicePitch, amd::CopyMetadata copyMetadata) const {
|
||||
bool result = false;
|
||||
if (setup_.disableCopyBufferToImage_) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
} else {
|
||||
// Use PAL path for a transfer
|
||||
result =
|
||||
@@ -620,7 +628,7 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
|
||||
if (completeOperation_ && !result) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -629,16 +637,19 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
|
||||
|
||||
bool DmaBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
bool result = false;
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
|
||||
if (setup_.disableCopyImage_) {
|
||||
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
|
||||
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
|
||||
copyMetadata);
|
||||
} else {
|
||||
//! @todo Add HW accelerated path
|
||||
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
|
||||
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
|
||||
copyMetadata);
|
||||
}
|
||||
|
||||
return result;
|
||||
@@ -785,7 +796,8 @@ const uint RejectedFormatChannelTotal = sizeof(RejectedOrder) / sizeof(FormatCon
|
||||
bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin,
|
||||
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
|
||||
bool entire, size_t rowPitch, size_t slicePitch) const {
|
||||
bool entire, size_t rowPitch, size_t slicePitch,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
static const bool CopyRect = false;
|
||||
@@ -796,7 +808,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
|
||||
|
||||
if (setup_.disableCopyBufferToImage_) {
|
||||
result = DmaBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -808,7 +820,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
|
||||
// but there are restriciton with older hardware
|
||||
if (dev().settings().imageDMA_) {
|
||||
result = DmaBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
if (result) {
|
||||
synchronize();
|
||||
return result;
|
||||
@@ -899,7 +911,8 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
|
||||
// Step 2. Initiate compute transfer with all staging buffers
|
||||
for (uint i = 0; i < MaxXferBuffers; ++i) {
|
||||
if (copySize > 0) {
|
||||
if (!copyBufferToImageKernel(*xferBuf[i], dstMemory, xferSrc, dst, xferRect, false)) {
|
||||
if (!copyBufferToImageKernel(*xferBuf[i], dstMemory, xferSrc, dst, xferRect, false,
|
||||
0UL, 0UL, copyMetadata)) {
|
||||
transfer = false;
|
||||
break;
|
||||
}
|
||||
@@ -941,7 +954,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
|
||||
|
||||
if (!result) {
|
||||
result = copyBufferToImageKernel(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
|
||||
rowPitch, slicePitch);
|
||||
rowPitch, slicePitch, copyMetadata);
|
||||
}
|
||||
|
||||
synchronize();
|
||||
@@ -1045,7 +1058,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
|
||||
const amd::Coord3D& srcOrigin,
|
||||
const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire,
|
||||
size_t rowPitch, size_t slicePitch) const {
|
||||
size_t rowPitch, size_t slicePitch,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
bool rejected = false;
|
||||
Memory* dstView = &gpuMem(dstMemory);
|
||||
bool releaseView = false;
|
||||
@@ -1084,7 +1098,7 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
|
||||
// Fall into the host path if the image format was rejected
|
||||
if (rejected) {
|
||||
return HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire);
|
||||
entire, 0UL, 0UL, copyMetadata);
|
||||
}
|
||||
|
||||
// Use a common blit type with three dimensions by default
|
||||
@@ -1183,7 +1197,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
|
||||
bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin,
|
||||
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
|
||||
bool entire, size_t rowPitch, size_t slicePitch) const {
|
||||
bool entire, size_t rowPitch, size_t slicePitch,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
static const bool CopyRect = false;
|
||||
@@ -1194,7 +1209,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
|
||||
|
||||
if (setup_.disableCopyImageToBuffer_) {
|
||||
result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1209,7 +1224,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
|
||||
if (dev().settings().imageDMA_ &&
|
||||
gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical) {
|
||||
result = DmaBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
if (result) {
|
||||
synchronize();
|
||||
return result;
|
||||
@@ -1280,7 +1295,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
|
||||
for (uint i = 0; i < MaxXferBuffers; ++i) {
|
||||
if (copySizeTmp > 0) {
|
||||
if (!copyImageToBufferKernel(srcMemory, *xferBuf[i], srcTmp, xferDst, xferRectTmp,
|
||||
false)) {
|
||||
false, 0UL, 0UL, copyMetadata)) {
|
||||
transfer = false;
|
||||
break;
|
||||
}
|
||||
@@ -1352,7 +1367,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
|
||||
|
||||
if (!result) {
|
||||
result = copyImageToBufferKernel(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
|
||||
rowPitch, slicePitch);
|
||||
rowPitch, slicePitch, copyMetadata);
|
||||
}
|
||||
|
||||
synchronize();
|
||||
@@ -1365,7 +1380,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
|
||||
const amd::Coord3D& srcOrigin,
|
||||
const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire,
|
||||
size_t rowPitch, size_t slicePitch) const {
|
||||
size_t rowPitch, size_t slicePitch,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
bool rejected = false;
|
||||
Memory* srcView = &gpuMem(srcMemory);
|
||||
bool releaseView = false;
|
||||
@@ -1404,7 +1420,7 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
|
||||
// Fall into the host path if the image format was rejected
|
||||
if (rejected) {
|
||||
return HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire);
|
||||
entire, 0UL, 0UL, copyMetadata);
|
||||
}
|
||||
|
||||
uint blitType = BlitCopyImageToBuffer;
|
||||
@@ -1506,7 +1522,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
|
||||
|
||||
bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool rejected = false;
|
||||
Memory* srcView = &gpuMem(srcMemory);
|
||||
@@ -1553,7 +1570,8 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst
|
||||
// Fall into the host path for the entire 2D copy or
|
||||
// if the image format was rejected
|
||||
if (rejected) {
|
||||
result = HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
|
||||
result = HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1663,7 +1681,8 @@ void FindPinSize(size_t& pinSize, const amd::Coord3D& size, size_t& rowPitch, si
|
||||
|
||||
bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
size_t rowPitch, size_t slicePitch, bool entire) const {
|
||||
size_t rowPitch, size_t slicePitch, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -1673,7 +1692,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result =
|
||||
HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire);
|
||||
HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -1686,7 +1706,7 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
if (amdMemory == NULL) {
|
||||
// Force SW copy
|
||||
result = HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
entire, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1699,7 +1719,7 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
// Copy image to buffer
|
||||
result = copyImageToBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire, rowPitch,
|
||||
slicePitch);
|
||||
slicePitch, copyMetadata);
|
||||
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
@@ -1712,7 +1732,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
size_t rowPitch, size_t slicePitch, bool entire) const {
|
||||
size_t rowPitch, size_t slicePitch, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -1722,7 +1743,8 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result =
|
||||
HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire);
|
||||
HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -1738,7 +1760,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
|
||||
if (amdMemory == nullptr) {
|
||||
// Force SW copy
|
||||
result = HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
entire, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1756,7 +1778,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
|
||||
|
||||
// Copy image to buffer
|
||||
result = copyBufferToImage(*srcMemory, dstMemory, srcOrigin, origin, size, entire, rowPitch,
|
||||
slicePitch);
|
||||
slicePitch, copyMetadata);
|
||||
|
||||
if (pinned) {
|
||||
// Add pinned memory for a later release
|
||||
@@ -1774,7 +1796,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
|
||||
bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::BufferRect& srcRectIn,
|
||||
const amd::BufferRect& dstRectIn, const amd::Coord3D& sizeIn,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
bool rejected = false;
|
||||
@@ -1784,7 +1806,8 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory
|
||||
gpuMem(dstMemory).isHostMemDirectAccess()) {
|
||||
if (!dev().settings().disableSdma_) {
|
||||
result =
|
||||
DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire);
|
||||
DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire,
|
||||
copyMetadata);
|
||||
}
|
||||
if (result) {
|
||||
synchronize();
|
||||
@@ -1888,7 +1911,7 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory
|
||||
|
||||
bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -1897,7 +1920,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
|
||||
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -1909,7 +1932,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
if (amdMemory == NULL) {
|
||||
// Force SW copy
|
||||
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
|
||||
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1921,12 +1944,12 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
Memory* dstMemory = dev().getGpuMemory(amdMemory);
|
||||
|
||||
// Copy image to buffer
|
||||
result = copyBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire);
|
||||
result = copyBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire, copyMetadata);
|
||||
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
} else {
|
||||
result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
|
||||
result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1938,7 +1961,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::BufferRect& bufRect,
|
||||
const amd::BufferRect& hostRect, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -1947,7 +1970,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
|
||||
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -1957,7 +1981,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
if (amdMemory == NULL) {
|
||||
// Force SW copy
|
||||
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
|
||||
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1973,7 +1998,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
Memory* dstMemory = dev().getGpuMemory(amdMemory);
|
||||
|
||||
// Copy image to buffer
|
||||
result = copyBufferRect(srcMemory, *dstMemory, bufRect, rect, size, entire);
|
||||
result = copyBufferRect(srcMemory, *dstMemory, bufRect, rect, size, entire,
|
||||
copyMetadata);
|
||||
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
@@ -1986,7 +2012,7 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -1997,7 +2023,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
|
||||
(gpuMem(dstMemory).memoryType() == Resource::Persistent)) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
|
||||
result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -2010,7 +2036,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
|
||||
|
||||
if (amdMemory == NULL) {
|
||||
// Force SW copy
|
||||
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
|
||||
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -2022,12 +2048,12 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
|
||||
Memory* srcMemory = dev().getGpuMemory(amdMemory);
|
||||
|
||||
// Copy buffer rect
|
||||
result = copyBuffer(*srcMemory, dstMemory, srcOrigin, origin, size, entire);
|
||||
result = copyBuffer(*srcMemory, dstMemory, srcOrigin, origin, size, entire, copyMetadata);
|
||||
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
} else {
|
||||
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
|
||||
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2039,7 +2065,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
|
||||
bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::BufferRect& hostRect,
|
||||
const amd::BufferRect& bufRect, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -2050,7 +2076,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
|
||||
gpuMem(dstMemory).isPersistentDirectMap()) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
|
||||
result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -2061,7 +2088,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
|
||||
if (amdMemory == NULL) {
|
||||
// Force SW copy
|
||||
result =
|
||||
HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
|
||||
HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -2080,7 +2108,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
|
||||
rect.end_ = hostRect.end_;
|
||||
|
||||
// Copy buffer rect
|
||||
result = copyBufferRect(*srcMemory, dstMemory, rect, bufRect, size, entire);
|
||||
result = copyBufferRect(*srcMemory, dstMemory, rect, bufRect, size, entire,
|
||||
copyMetadata);
|
||||
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
@@ -2172,7 +2201,7 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern,
|
||||
|
||||
bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& sizeIn, bool entire) const {
|
||||
const amd::Coord3D& sizeIn, bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -2246,7 +2275,7 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds
|
||||
address parameters = kernels_[blitType]->parameters().values();
|
||||
result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters);
|
||||
} else {
|
||||
result = DmaBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire);
|
||||
result = DmaBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire, copyMetadata);
|
||||
}
|
||||
|
||||
synchronize();
|
||||
|
||||
@@ -57,7 +57,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
@@ -66,7 +68,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to system memory
|
||||
@@ -76,7 +80,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
@@ -84,7 +90,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
@@ -93,7 +101,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to an image object
|
||||
@@ -103,7 +113,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
@@ -112,7 +124,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
@@ -121,7 +135,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::BufferRect& srcRect, //!< Source rectangle
|
||||
const amd::BufferRect& dstRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to a buffer object
|
||||
@@ -132,7 +148,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to an image object
|
||||
@@ -143,7 +161,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to another image object
|
||||
@@ -152,7 +172,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Stream memory write operation - Write a 'value' at 'memory'.
|
||||
@@ -269,7 +291,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::BufferRect& srcRectIn, //!< Source rectangle
|
||||
const amd::BufferRect& dstRectIn, //!< Destination rectangle
|
||||
const amd::Coord3D& sizeIn, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
@@ -277,7 +301,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
@@ -286,7 +312,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
@@ -294,7 +322,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
@@ -303,7 +333,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to an image object
|
||||
@@ -312,7 +344,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to an image object
|
||||
@@ -323,7 +357,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to a buffer object
|
||||
@@ -334,7 +370,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to another image object
|
||||
@@ -343,7 +381,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to system memory
|
||||
@@ -353,7 +393,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to an image object
|
||||
@@ -363,7 +405,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Fills a buffer memory with a pattern data
|
||||
@@ -433,7 +477,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to a buffer object
|
||||
@@ -444,7 +490,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Creates a program for all blit operations
|
||||
|
||||
@@ -1200,7 +1200,7 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) {
|
||||
// Accelerated transfer without pinning
|
||||
amd::Coord3D dstOrigin(offset);
|
||||
result = blitMgr().copyBuffer(*memory, *hostMemory, origin, dstOrigin, size,
|
||||
vcmd.isEntireMemory());
|
||||
vcmd.isEntireMemory(), vcmd.copyMetadata());
|
||||
} else {
|
||||
// The logic below will perform 2 step copy to make sure memory pinning doesn't
|
||||
// occur on the first unaligned page, because in Windows memory manager can
|
||||
@@ -1218,11 +1218,11 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) {
|
||||
}
|
||||
// Make first step transfer
|
||||
if (partial > 0) {
|
||||
result = blitMgr().readBuffer(*memory, vcmd.destination(), origin, partial);
|
||||
result = blitMgr().readBuffer(*memory, vcmd.destination(), origin, partial, false, vcmd.copyMetadata());
|
||||
}
|
||||
// Second step transfer if something left to copy
|
||||
if (partial < size[0]) {
|
||||
result &= blitMgr().readBuffer(*memory, tmpHost, origin[0] + partial, size[0] - partial);
|
||||
result &= blitMgr().readBuffer(*memory, tmpHost, origin[0] + partial, size[0] - partial, false, vcmd.copyMetadata());
|
||||
}
|
||||
}
|
||||
if (nullptr != bufferFromImage) {
|
||||
@@ -1237,10 +1237,10 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) {
|
||||
vcmd.hostRect().slicePitch_);
|
||||
if (hostMemory != nullptr) {
|
||||
result = blitMgr().copyBufferRect(*memory, *hostMemory, vcmd.bufRect(), hostbufferRect,
|
||||
vcmd.size(), vcmd.isEntireMemory());
|
||||
vcmd.size(), vcmd.isEntireMemory(), vcmd.copyMetadata());
|
||||
} else {
|
||||
result = blitMgr().readBufferRect(*memory, vcmd.destination(), vcmd.bufRect(),
|
||||
vcmd.hostRect(), vcmd.size(), vcmd.isEntireMemory());
|
||||
vcmd.hostRect(), vcmd.size(), vcmd.isEntireMemory(), vcmd.copyMetadata());
|
||||
}
|
||||
} break;
|
||||
case CL_COMMAND_READ_IMAGE:
|
||||
@@ -1255,7 +1255,7 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) {
|
||||
// Copy memory from the original image buffer into the backing store image
|
||||
result = blitMgr().copyBufferToImage(*buffer, *imageBuffer->CopyImageBuffer(), offs,
|
||||
offs, image->getRegion(), true,
|
||||
image->getRowPitch(), image->getSlicePitch());
|
||||
image->getRowPitch(), image->getSlicePitch(), vcmd.copyMetadata());
|
||||
}
|
||||
}
|
||||
if (hostMemory != nullptr) {
|
||||
@@ -1263,10 +1263,12 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) {
|
||||
amd::Coord3D dstOrigin(offset);
|
||||
result =
|
||||
blitMgr().copyImageToBuffer(*memory, *hostMemory, vcmd.origin(), dstOrigin, vcmd.size(),
|
||||
vcmd.isEntireMemory(), vcmd.rowPitch(), vcmd.slicePitch());
|
||||
vcmd.isEntireMemory(), vcmd.rowPitch(), vcmd.slicePitch(),
|
||||
vcmd.copyMetadata());
|
||||
} else {
|
||||
result = blitMgr().readImage(*memory, vcmd.destination(), vcmd.origin(), vcmd.size(),
|
||||
vcmd.rowPitch(), vcmd.slicePitch(), vcmd.isEntireMemory());
|
||||
vcmd.rowPitch(), vcmd.slicePitch(), vcmd.isEntireMemory(),
|
||||
vcmd.copyMetadata());
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -1331,7 +1333,7 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) {
|
||||
// Accelerated transfer without pinning
|
||||
amd::Coord3D srcOrigin(offset);
|
||||
result = blitMgr().copyBuffer(*hostMemory, *memory, srcOrigin, origin, size,
|
||||
vcmd.isEntireMemory());
|
||||
vcmd.isEntireMemory(), vcmd.copyMetadata());
|
||||
} else {
|
||||
// The logic below will perform 2 step copy to make sure memory pinning doesn't
|
||||
// occur on the first unaligned page, because in Windows memory manager can
|
||||
@@ -1349,11 +1351,11 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) {
|
||||
}
|
||||
// Make first step transfer
|
||||
if (partial > 0) {
|
||||
result = blitMgr().writeBuffer(vcmd.source(), *memory, origin, partial);
|
||||
result = blitMgr().writeBuffer(vcmd.source(), *memory, origin, partial, false, vcmd.copyMetadata());
|
||||
}
|
||||
// Second step transfer if something left to copy
|
||||
if (partial < size[0]) {
|
||||
result &= blitMgr().writeBuffer(tmpHost, *memory, origin[0] + partial, size[0] - partial);
|
||||
result &= blitMgr().writeBuffer(tmpHost, *memory, origin[0] + partial, size[0] - partial, false, vcmd.copyMetadata());
|
||||
}
|
||||
}
|
||||
if (nullptr != bufferFromImage) {
|
||||
@@ -1368,10 +1370,10 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) {
|
||||
vcmd.hostRect().slicePitch_);
|
||||
if (hostMemory != nullptr) {
|
||||
result = blitMgr().copyBufferRect(*hostMemory, *memory, hostbufferRect, vcmd.bufRect(),
|
||||
vcmd.size(), vcmd.isEntireMemory());
|
||||
vcmd.size(), vcmd.isEntireMemory(), vcmd.copyMetadata());
|
||||
} else {
|
||||
result = blitMgr().writeBufferRect(vcmd.source(), *memory, vcmd.hostRect(), vcmd.bufRect(),
|
||||
vcmd.size(), vcmd.isEntireMemory());
|
||||
vcmd.size(), vcmd.isEntireMemory(), vcmd.copyMetadata());
|
||||
}
|
||||
} break;
|
||||
case CL_COMMAND_WRITE_IMAGE:
|
||||
@@ -1380,10 +1382,12 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) {
|
||||
amd::Coord3D srcOrigin(offset);
|
||||
result =
|
||||
blitMgr().copyBufferToImage(*hostMemory, *memory, srcOrigin, vcmd.origin(), vcmd.size(),
|
||||
vcmd.isEntireMemory(), vcmd.rowPitch(), vcmd.slicePitch());
|
||||
vcmd.isEntireMemory(), vcmd.rowPitch(), vcmd.slicePitch(),
|
||||
vcmd.copyMetadata());
|
||||
} else {
|
||||
result = blitMgr().writeImage(vcmd.source(), *memory, vcmd.origin(), vcmd.size(),
|
||||
vcmd.rowPitch(), vcmd.slicePitch(), vcmd.isEntireMemory());
|
||||
vcmd.rowPitch(), vcmd.slicePitch(), vcmd.isEntireMemory(),
|
||||
vcmd.copyMetadata());
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -1404,7 +1408,8 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& vcmd) {
|
||||
bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memory& dstMem,
|
||||
bool entire, const amd::Coord3D& srcOrigin,
|
||||
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
|
||||
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect) {
|
||||
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
|
||||
amd::CopyMetadata copyMetadata) {
|
||||
// Translate memory references and ensure cache up-to-date
|
||||
pal::Memory* dstMemory = dev().getGpuMemory(&dstMem);
|
||||
pal::Memory* srcMemory = dev().getGpuMemory(&srcMem);
|
||||
@@ -1464,7 +1469,7 @@ bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memo
|
||||
}
|
||||
|
||||
result = blitMgr().copyBuffer(*srcMemory, *dstMemory, realSrcOrigin, realDstOrigin, realSize,
|
||||
entire);
|
||||
entire, copyMetadata);
|
||||
|
||||
if (nullptr != bufferFromImageSrc) {
|
||||
bufferFromImageSrc->release();
|
||||
@@ -1474,18 +1479,18 @@ bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memo
|
||||
}
|
||||
} break;
|
||||
case CL_COMMAND_COPY_BUFFER_RECT:
|
||||
result = blitMgr().copyBufferRect(*srcMemory, *dstMemory, srcRect, dstRect, size, entire);
|
||||
result = blitMgr().copyBufferRect(*srcMemory, *dstMemory, srcRect, dstRect, size, entire, copyMetadata);
|
||||
break;
|
||||
case CL_COMMAND_COPY_IMAGE_TO_BUFFER:
|
||||
result =
|
||||
blitMgr().copyImageToBuffer(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire);
|
||||
blitMgr().copyImageToBuffer(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire, 0UL, 0UL, copyMetadata);
|
||||
break;
|
||||
case CL_COMMAND_COPY_BUFFER_TO_IMAGE:
|
||||
result =
|
||||
blitMgr().copyBufferToImage(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire);
|
||||
blitMgr().copyBufferToImage(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire, 0UL, 0UL, copyMetadata);
|
||||
break;
|
||||
case CL_COMMAND_COPY_IMAGE:
|
||||
result = blitMgr().copyImage(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire);
|
||||
result = blitMgr().copyImage(*srcMemory, *dstMemory, srcOrigin, dstOrigin, size, entire, copyMetadata);
|
||||
break;
|
||||
default:
|
||||
LogError("Unsupported command type for memory copy!");
|
||||
@@ -1512,7 +1517,7 @@ void VirtualGPU::submitCopyMemory(amd::CopyMemoryCommand& vcmd) {
|
||||
bool entire = vcmd.isEntireMemory();
|
||||
|
||||
if (!copyMemory(type, vcmd.source(), vcmd.destination(), entire, vcmd.srcOrigin(),
|
||||
vcmd.dstOrigin(), vcmd.size(), vcmd.srcRect(), vcmd.dstRect())) {
|
||||
vcmd.dstOrigin(), vcmd.size(), vcmd.srcRect(), vcmd.dstRect(), vcmd.copyMetadata())) {
|
||||
vcmd.setStatus(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
||||
|
||||
@@ -630,7 +630,9 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
const amd::Coord3D& dstOrigin, //!< destination memory object
|
||||
const amd::Coord3D& size, //!< copy size
|
||||
const amd::BufferRect& srcRect, //!< region of source for copy
|
||||
const amd::BufferRect& dstRect //!< region of destination for copy
|
||||
const amd::BufferRect& dstRect, //!< region of destination for copy
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
);
|
||||
|
||||
void PrintChildren(const HSAILKernel& hsaKernel, //!< The parent HSAIL kernel
|
||||
|
||||
@@ -61,7 +61,7 @@ bool DmaBlitManager::readMemoryStaged(Memory& srcMemory, void* dstHost, Memory&
|
||||
|
||||
bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
// HSA copy functionality with a possible async operation
|
||||
gpu().releaseGpuMemoryFence(kSkipCpuWait);
|
||||
|
||||
@@ -70,7 +70,7 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().Barriers().WaitCurrent();
|
||||
return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
|
||||
return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
|
||||
} else {
|
||||
size_t srcSize = size[0];
|
||||
size_t offset = 0;
|
||||
@@ -150,7 +150,8 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::BufferRect& bufRect, const amd::BufferRect& hostRect,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
// HSA copy functionality with a possible async operation
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
@@ -159,7 +160,7 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().Barriers().WaitCurrent();
|
||||
return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
|
||||
return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire, copyMetadata);
|
||||
} else {
|
||||
Memory& xferBuf = dev().xferRead().acquire();
|
||||
address staging = xferBuf.getDeviceMemory();
|
||||
@@ -190,17 +191,17 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
bool DmaBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin,
|
||||
const amd::Coord3D& size, size_t rowPitch, size_t slicePitch,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
// HSA copy functionality with a possible async operation
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
if (setup_.disableReadImage_) {
|
||||
return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
entire, copyMetadata);
|
||||
} else {
|
||||
//! @todo Add HW accelerated path
|
||||
return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
entire, copyMetadata);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -222,13 +223,13 @@ bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, M
|
||||
|
||||
bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
// Use host copy if memory has direct access
|
||||
if (setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess() ||
|
||||
gpuMem(dstMemory).IsPersistentDirectMap()) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().releaseGpuMemoryFence();
|
||||
return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
|
||||
return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
|
||||
} else {
|
||||
// HSA copy functionality with a possible async operation
|
||||
gpu().releaseGpuMemoryFence(kSkipCpuWait);
|
||||
@@ -314,14 +315,14 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
|
||||
bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::BufferRect& hostRect,
|
||||
const amd::BufferRect& bufRect, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
// HSA copy functionality with a possible async operation
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
// Use host copy if memory has direct access
|
||||
if (setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess() ||
|
||||
gpuMem(dstMemory).IsPersistentDirectMap()) {
|
||||
return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
|
||||
return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire, copyMetadata);
|
||||
} else {
|
||||
Memory& xferBuf = dev().xferWrite().acquire();
|
||||
address staging = xferBuf.getDeviceMemory();
|
||||
@@ -352,17 +353,18 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
|
||||
|
||||
bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
size_t rowPitch, size_t slicePitch, bool entire) const {
|
||||
size_t rowPitch, size_t slicePitch, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
// HSA copy functionality with a possible async operation
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
if (setup_.disableWriteImage_) {
|
||||
return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
entire, copyMetadata);
|
||||
} else {
|
||||
//! @todo Add HW accelerated path
|
||||
return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
entire, copyMetadata);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -370,13 +372,13 @@ bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
|
||||
|
||||
bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
if (setup_.disableCopyBuffer_ ||
|
||||
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() &&
|
||||
(dev().agent_profile() != HSA_PROFILE_FULL) && dstMemory.isHostMemDirectAccess())) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().releaseGpuMemoryFence();
|
||||
return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size);
|
||||
return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, false, copyMetadata);
|
||||
} else {
|
||||
return hsaCopy(gpuMem(srcMemory), gpuMem(dstMemory), srcOrigin, dstOrigin, size);
|
||||
}
|
||||
@@ -387,13 +389,14 @@ bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMe
|
||||
// ================================================================================================
|
||||
bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
if (setup_.disableCopyBufferRect_ ||
|
||||
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() &&
|
||||
dstMemory.isHostMemDirectAccess())) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().releaseGpuMemoryFence();
|
||||
return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire);
|
||||
return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire, copyMetadata);
|
||||
} else {
|
||||
gpu().releaseGpuMemoryFence(kSkipCpuWait);
|
||||
|
||||
@@ -500,7 +503,7 @@ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& d
|
||||
bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire, size_t rowPitch,
|
||||
size_t slicePitch) const {
|
||||
size_t slicePitch, amd::CopyMetadata copyMetadata) const {
|
||||
// HSA copy functionality with a possible async operation, hence make sure GPU is done
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
@@ -508,7 +511,7 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory
|
||||
|
||||
if (setup_.disableCopyImageToBuffer_) {
|
||||
result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
} else {
|
||||
Image& srcImage = static_cast<roc::Image&>(srcMemory);
|
||||
Buffer& dstBuffer = static_cast<roc::Buffer&>(dstMemory);
|
||||
@@ -534,7 +537,7 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory
|
||||
// Check if a HostBlit transfer is required
|
||||
if (completeOperation_ && !result) {
|
||||
result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -544,7 +547,7 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory
|
||||
bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire, size_t rowPitch,
|
||||
size_t slicePitch) const {
|
||||
size_t slicePitch, amd::CopyMetadata copyMetadata) const {
|
||||
// HSA copy functionality with a possible async operation, hence make sure GPU is done
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
@@ -552,7 +555,7 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
|
||||
|
||||
if (setup_.disableCopyBufferToImage_) {
|
||||
result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
} else {
|
||||
Buffer& srcBuffer = static_cast<roc::Buffer&>(srcMemory);
|
||||
Image& dstImage = static_cast<roc::Image&>(dstMemory);
|
||||
@@ -579,7 +582,7 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
|
||||
// Check if a HostBlit tran sfer is required
|
||||
if (completeOperation_ && !result) {
|
||||
result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -588,17 +591,20 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
|
||||
|
||||
bool DmaBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
// HSA copy functionality with a possible async operation, hence make sure GPU is done
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
bool result = false;
|
||||
|
||||
if (setup_.disableCopyImage_) {
|
||||
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
|
||||
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
|
||||
copyMetadata);
|
||||
} else {
|
||||
//! @todo Add HW accelerated path
|
||||
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
|
||||
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
|
||||
copyMetadata);
|
||||
}
|
||||
|
||||
return result;
|
||||
@@ -878,7 +884,8 @@ const uint RejectedFormatChannelTotal = sizeof(RejectedOrder) / sizeof(FormatCon
|
||||
bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin,
|
||||
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
|
||||
bool entire, size_t rowPitch, size_t slicePitch) const {
|
||||
bool entire, size_t rowPitch, size_t slicePitch,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
|
||||
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
|
||||
|
||||
@@ -890,7 +897,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
|
||||
|
||||
if (setup_.disableCopyBufferToImage_) {
|
||||
result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -902,7 +909,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
|
||||
// but there are restriciton with older hardware
|
||||
if (dev().settings().imageDMA_) {
|
||||
result = DmaBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
if (result) {
|
||||
synchronize();
|
||||
return result;
|
||||
@@ -912,7 +919,7 @@ bool KernelBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Mem
|
||||
|
||||
if (!result) {
|
||||
result = copyBufferToImageKernel(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
|
||||
rowPitch, slicePitch);
|
||||
rowPitch, slicePitch, copyMetadata);
|
||||
}
|
||||
|
||||
synchronize();
|
||||
@@ -949,7 +956,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
|
||||
const amd::Coord3D& srcOrigin,
|
||||
const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire,
|
||||
size_t rowPitch, size_t slicePitch) const {
|
||||
size_t rowPitch, size_t slicePitch,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
|
||||
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
|
||||
|
||||
@@ -995,7 +1003,7 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
|
||||
// Fall into the host path if the image format was rejected
|
||||
if (rejected) {
|
||||
return DmaBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
}
|
||||
|
||||
// Use a common blit type with three dimensions by default
|
||||
@@ -1096,7 +1104,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
|
||||
bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin,
|
||||
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
|
||||
bool entire, size_t rowPitch, size_t slicePitch) const {
|
||||
bool entire, size_t rowPitch, size_t slicePitch,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
|
||||
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
|
||||
|
||||
@@ -1108,7 +1117,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
|
||||
|
||||
if (setup_.disableCopyImageToBuffer_) {
|
||||
result = DmaBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1122,7 +1131,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
|
||||
// single step SDMA is causing corruption and the cause is under investigation
|
||||
if (dev().settings().imageDMA_) {
|
||||
result = DmaBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
if (result) {
|
||||
synchronize();
|
||||
return result;
|
||||
@@ -1132,7 +1141,7 @@ bool KernelBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Mem
|
||||
|
||||
if (!result) {
|
||||
result = copyImageToBufferKernel(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
|
||||
rowPitch, slicePitch);
|
||||
rowPitch, slicePitch, copyMetadata);
|
||||
}
|
||||
|
||||
synchronize();
|
||||
@@ -1145,7 +1154,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
|
||||
const amd::Coord3D& srcOrigin,
|
||||
const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire,
|
||||
size_t rowPitch, size_t slicePitch) const {
|
||||
size_t rowPitch, size_t slicePitch,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
|
||||
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
|
||||
|
||||
@@ -1190,7 +1200,7 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
|
||||
// Fall into the host path if the image format was rejected
|
||||
if (rejected) {
|
||||
return DmaBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
entire, rowPitch, slicePitch);
|
||||
entire, rowPitch, slicePitch, copyMetadata);
|
||||
}
|
||||
|
||||
uint blitType = BlitCopyImageToBuffer;
|
||||
@@ -1296,7 +1306,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
|
||||
|
||||
bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
const amd::Coord3D& size, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
|
||||
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
|
||||
|
||||
@@ -1348,7 +1359,8 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst
|
||||
// Fall into the host path for the entire 2D copy or
|
||||
// if the image format was rejected
|
||||
if (rejected) {
|
||||
result = DmaBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
|
||||
result = DmaBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1460,7 +1472,8 @@ void FindPinSize(size_t& pinSize, const amd::Coord3D& size, size_t& rowPitch, si
|
||||
|
||||
bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
size_t rowPitch, size_t slicePitch, bool entire) const {
|
||||
size_t rowPitch, size_t slicePitch, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
|
||||
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
|
||||
|
||||
@@ -1471,7 +1484,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
if (setup_.disableReadImage_ || (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().releaseGpuMemoryFence();
|
||||
result = HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire);
|
||||
result = HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -1484,7 +1498,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
if (amdMemory == nullptr) {
|
||||
// Force SW copy
|
||||
result =
|
||||
DmaBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire);
|
||||
DmaBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1497,7 +1512,7 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
// Copy image to buffer
|
||||
result = copyImageToBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire, rowPitch,
|
||||
slicePitch);
|
||||
slicePitch, copyMetadata);
|
||||
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
@@ -1510,7 +1525,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
size_t rowPitch, size_t slicePitch, bool entire) const {
|
||||
size_t rowPitch, size_t slicePitch, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
|
||||
guarantee((dev().info().imageSupport_ != false), "Image not supported on this device");
|
||||
|
||||
@@ -1521,7 +1537,8 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
|
||||
if (setup_.disableWriteImage_ || dstMemory.isHostMemDirectAccess()) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().releaseGpuMemoryFence();
|
||||
result = HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire);
|
||||
result = HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -1534,7 +1551,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
|
||||
if (amdMemory == nullptr) {
|
||||
// Force SW copy
|
||||
result = DmaBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
entire, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1547,7 +1564,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
|
||||
|
||||
// Copy image to buffer
|
||||
result = copyBufferToImage(*srcMemory, dstMemory, srcOrigin, origin, size, entire, rowPitch,
|
||||
slicePitch);
|
||||
slicePitch, copyMetadata);
|
||||
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
@@ -1561,7 +1578,7 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
|
||||
bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::BufferRect& srcRectIn,
|
||||
const amd::BufferRect& dstRectIn, const amd::Coord3D& sizeIn,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
bool rejected = false;
|
||||
@@ -1569,7 +1586,8 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory
|
||||
// Fall into the ROC path for rejected transfers
|
||||
if (dev().info().pcie_atomics_ && (setup_.disableCopyBufferRect_ ||
|
||||
srcMemory.isHostMemDirectAccess() || dstMemory.isHostMemDirectAccess())) {
|
||||
result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire);
|
||||
result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory, srcRectIn, dstRectIn, sizeIn, entire,
|
||||
copyMetadata);
|
||||
|
||||
if (result) {
|
||||
synchronize();
|
||||
@@ -1686,7 +1704,7 @@ bool KernelBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory
|
||||
// ================================================================================================
|
||||
bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -1716,7 +1734,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
if (setup_.disableReadBuffer_ || (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().releaseGpuMemoryFence();
|
||||
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
|
||||
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -1728,7 +1746,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
if (amdMemory == nullptr) {
|
||||
// Force SW copy
|
||||
result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
|
||||
result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1740,12 +1758,12 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
Memory* dstMemory = dev().getRocMemory(amdMemory);
|
||||
|
||||
// Copy image to buffer
|
||||
result = copyBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire);
|
||||
result = copyBuffer(srcMemory, *dstMemory, origin, dstOrigin, size, entire, copyMetadata);
|
||||
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
} else {
|
||||
result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
|
||||
result = DmaBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1758,7 +1776,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::BufferRect& bufRect,
|
||||
const amd::BufferRect& hostRect, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -1767,7 +1785,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().releaseGpuMemoryFence();
|
||||
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
|
||||
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -1777,7 +1796,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
|
||||
if (amdMemory == nullptr) {
|
||||
// Force SW copy
|
||||
result = DmaBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
|
||||
result = DmaBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1793,7 +1813,7 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
Memory* dstMemory = dev().getRocMemory(amdMemory);
|
||||
|
||||
// Copy image to buffer
|
||||
result = copyBufferRect(srcMemory, *dstMemory, bufRect, rect, size, entire);
|
||||
result = copyBufferRect(srcMemory, *dstMemory, bufRect, rect, size, entire, copyMetadata);
|
||||
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
@@ -1807,7 +1827,7 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
// ================================================================================================
|
||||
bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -1832,7 +1852,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
|
||||
gpuMem(dstMemory).IsPersistentDirectMap()) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().releaseGpuMemoryFence();
|
||||
result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
|
||||
result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -1845,7 +1865,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
|
||||
|
||||
if (amdMemory == nullptr) {
|
||||
// Force SW copy
|
||||
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
|
||||
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1857,12 +1877,12 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
|
||||
Memory* srcMemory = dev().getRocMemory(amdMemory);
|
||||
|
||||
// Copy buffer rect
|
||||
result = copyBuffer(*srcMemory, dstMemory, srcOrigin, origin, size, entire);
|
||||
result = copyBuffer(*srcMemory, dstMemory, srcOrigin, origin, size, entire, copyMetadata);
|
||||
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
} else {
|
||||
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
|
||||
result = DmaBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1875,7 +1895,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
|
||||
bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::BufferRect& hostRect,
|
||||
const amd::BufferRect& bufRect, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
bool entire, amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -1884,7 +1904,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
|
||||
gpuMem(dstMemory).IsPersistentDirectMap()) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().releaseGpuMemoryFence();
|
||||
result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
|
||||
result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -1894,7 +1915,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
|
||||
|
||||
if (amdMemory == nullptr) {
|
||||
// Force DMA copy with staging
|
||||
result = DmaBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
|
||||
result = DmaBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire,
|
||||
copyMetadata);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
@@ -1913,7 +1935,7 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
|
||||
rect.end_ = hostRect.end_;
|
||||
|
||||
// Copy buffer rect
|
||||
result = copyBufferRect(*srcMemory, dstMemory, rect, bufRect, size, entire);
|
||||
result = copyBufferRect(*srcMemory, dstMemory, rect, bufRect, size, entire, copyMetadata);
|
||||
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
@@ -2151,7 +2173,8 @@ bool KernelBlitManager::fillBuffer3D(device::Memory& memory, const void* pattern
|
||||
// ================================================================================================
|
||||
bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& sizeIn, bool entire) const {
|
||||
const amd::Coord3D& sizeIn, bool entire,
|
||||
amd::CopyMetadata copyMetadata) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
bool p2p = (&gpuMem(srcMemory).dev() != &gpuMem(dstMemory).dev()) &&
|
||||
@@ -2247,7 +2270,8 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds
|
||||
gpu().SetCopyCommandType(CL_COMMAND_READ_BUFFER);
|
||||
}
|
||||
}
|
||||
result = DmaBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire);
|
||||
result = DmaBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire,
|
||||
copyMetadata);
|
||||
}
|
||||
|
||||
synchronize();
|
||||
|
||||
@@ -59,7 +59,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
@@ -68,7 +70,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to system memory
|
||||
@@ -78,7 +82,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
@@ -86,7 +92,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
@@ -95,7 +103,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to an image object
|
||||
@@ -105,7 +115,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
@@ -114,7 +126,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
@@ -123,7 +137,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::BufferRect& srcRect, //!< Source rectangle
|
||||
const amd::BufferRect& dstRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to a buffer object
|
||||
@@ -134,7 +150,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to an image object
|
||||
@@ -145,7 +163,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to another image object
|
||||
@@ -154,7 +174,9 @@ class DmaBlitManager : public device::HostBlitManager {
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Stream memory write operation - Write a 'value' at 'memory'.
|
||||
@@ -290,7 +312,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::BufferRect& srcRectIn, //!< Source rectangle
|
||||
const amd::BufferRect& dstRectIn, //!< Destination rectangle
|
||||
const amd::Coord3D& sizeIn, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
@@ -298,7 +322,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
@@ -307,7 +333,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
@@ -315,7 +343,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
@@ -324,7 +354,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to an image object
|
||||
@@ -333,7 +365,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to an image object
|
||||
@@ -344,7 +378,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to a buffer object
|
||||
@@ -355,7 +391,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to another image object
|
||||
@@ -364,7 +402,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to system memory
|
||||
@@ -374,7 +414,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies system memory to an image object
|
||||
@@ -384,7 +426,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata()//!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Fills a buffer memory with a pattern data
|
||||
@@ -481,7 +525,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Copies an image object to a buffer object
|
||||
@@ -492,7 +538,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
size_t slicePitch = 0, //!< Slice for buffer
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
) const;
|
||||
|
||||
//! Creates a program for all blit operations
|
||||
|
||||
@@ -1534,9 +1534,9 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& cmd) {
|
||||
// Accelerated transfer without pinning
|
||||
amd::Coord3D dstOrigin(offset);
|
||||
result = blitMgr().copyBuffer(*devMem, *hostMemory, origin, dstOrigin, size,
|
||||
cmd.isEntireMemory());
|
||||
cmd.isEntireMemory(), cmd.copyMetadata());
|
||||
} else {
|
||||
result = blitMgr().readBuffer(*devMem, dst, origin, size, cmd.isEntireMemory());
|
||||
result = blitMgr().readBuffer(*devMem, dst, origin, size, cmd.isEntireMemory(), cmd.copyMetadata());
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1548,10 +1548,10 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& cmd) {
|
||||
cmd.hostRect().slicePitch_);
|
||||
if (hostMemory != nullptr) {
|
||||
result = blitMgr().copyBufferRect(*devMem, *hostMemory, cmd.bufRect(), hostbufferRect,
|
||||
size, cmd.isEntireMemory());
|
||||
size, cmd.isEntireMemory(), cmd.copyMetadata());
|
||||
} else {
|
||||
result = blitMgr().readBufferRect(*devMem, dst, cmd.bufRect(), cmd.hostRect(), size,
|
||||
cmd.isEntireMemory());
|
||||
cmd.isEntireMemory(), cmd.copyMetadata());
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1576,10 +1576,10 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& cmd) {
|
||||
amd::Coord3D dstOrigin(offset);
|
||||
result =
|
||||
blitMgr().copyImageToBuffer(*devMem, *hostMemory, cmd.origin(), dstOrigin, size,
|
||||
cmd.isEntireMemory(), cmd.rowPitch(), cmd.slicePitch());
|
||||
cmd.isEntireMemory(), cmd.rowPitch(), cmd.slicePitch(), cmd.copyMetadata());
|
||||
} else {
|
||||
result = blitMgr().readImage(*devMem, dst, cmd.origin(), size, cmd.rowPitch(),
|
||||
cmd.slicePitch(), cmd.isEntireMemory());
|
||||
cmd.slicePitch(), cmd.isEntireMemory(), cmd.copyMetadata());
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1641,9 +1641,9 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& cmd) {
|
||||
// Accelerated transfer without pinning
|
||||
amd::Coord3D srcOrigin(offset);
|
||||
result = blitMgr().copyBuffer(*hostMemory, *devMem, srcOrigin, origin, size,
|
||||
cmd.isEntireMemory());
|
||||
cmd.isEntireMemory(), cmd.copyMetadata());
|
||||
} else {
|
||||
result = blitMgr().writeBuffer(src, *devMem, origin, size, cmd.isEntireMemory());
|
||||
result = blitMgr().writeBuffer(src, *devMem, origin, size, cmd.isEntireMemory(), cmd.copyMetadata());
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1655,10 +1655,10 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& cmd) {
|
||||
cmd.hostRect().slicePitch_);
|
||||
if (hostMemory != nullptr) {
|
||||
result = blitMgr().copyBufferRect(*hostMemory, *devMem, hostbufferRect, cmd.bufRect(),
|
||||
size, cmd.isEntireMemory());
|
||||
size, cmd.isEntireMemory(), cmd.copyMetadata());
|
||||
} else {
|
||||
result = blitMgr().writeBufferRect(src, *devMem, cmd.hostRect(), cmd.bufRect(), size,
|
||||
cmd.isEntireMemory());
|
||||
cmd.isEntireMemory(), cmd.copyMetadata());
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1668,10 +1668,11 @@ void VirtualGPU::submitWriteMemory(amd::WriteMemoryCommand& cmd) {
|
||||
amd::Coord3D srcOrigin(offset);
|
||||
result =
|
||||
blitMgr().copyBufferToImage(*hostMemory, *devMem, srcOrigin, cmd.origin(), size,
|
||||
cmd.isEntireMemory(), cmd.rowPitch(), cmd.slicePitch());
|
||||
cmd.isEntireMemory(), cmd.rowPitch(), cmd.slicePitch(),
|
||||
cmd.copyMetadata());
|
||||
} else {
|
||||
result = blitMgr().writeImage(src, *devMem, cmd.origin(), size, cmd.rowPitch(),
|
||||
cmd.slicePitch(), cmd.isEntireMemory());
|
||||
cmd.slicePitch(), cmd.isEntireMemory(), cmd.copyMetadata());
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1752,7 +1753,8 @@ void VirtualGPU::submitSvmPrefetchAsync(amd::SvmPrefetchAsyncCommand& cmd) {
|
||||
bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memory& dstMem,
|
||||
bool entire, const amd::Coord3D& srcOrigin,
|
||||
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
|
||||
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect) {
|
||||
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
|
||||
amd::CopyMetadata copyMetadata) {
|
||||
Memory* srcDevMem = dev().getRocMemory(&srcMem);
|
||||
Memory* dstDevMem = dev().getRocMemory(&dstMem);
|
||||
|
||||
@@ -1796,23 +1798,28 @@ bool VirtualGPU::copyMemory(cl_command_type type, amd::Memory& srcMem, amd::Memo
|
||||
realSize.c[0] *= elemSize;
|
||||
}
|
||||
|
||||
result = blitMgr().copyBuffer(*srcDevMem, *dstDevMem, realSrcOrigin, realDstOrigin, realSize, entire);
|
||||
result = blitMgr().copyBuffer(*srcDevMem, *dstDevMem, realSrcOrigin, realDstOrigin,
|
||||
realSize, entire, copyMetadata);
|
||||
break;
|
||||
}
|
||||
case CL_COMMAND_COPY_BUFFER_RECT: {
|
||||
result = blitMgr().copyBufferRect(*srcDevMem, *dstDevMem, srcRect, dstRect, size, entire);
|
||||
result = blitMgr().copyBufferRect(*srcDevMem, *dstDevMem, srcRect, dstRect, size, entire,
|
||||
copyMetadata);
|
||||
break;
|
||||
}
|
||||
case CL_COMMAND_COPY_IMAGE: {
|
||||
result = blitMgr().copyImage(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire);
|
||||
result = blitMgr().copyImage(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire,
|
||||
copyMetadata);
|
||||
break;
|
||||
}
|
||||
case CL_COMMAND_COPY_IMAGE_TO_BUFFER: {
|
||||
result = blitMgr().copyImageToBuffer(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire);
|
||||
result = blitMgr().copyImageToBuffer(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire,
|
||||
0UL, 0UL, copyMetadata);
|
||||
break;
|
||||
}
|
||||
case CL_COMMAND_COPY_BUFFER_TO_IMAGE: {
|
||||
result = blitMgr().copyBufferToImage(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire);
|
||||
result = blitMgr().copyBufferToImage(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size, entire,
|
||||
0UL, 0UL, copyMetadata);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -1841,7 +1848,7 @@ void VirtualGPU::submitCopyMemory(amd::CopyMemoryCommand& cmd) {
|
||||
bool entire = cmd.isEntireMemory();
|
||||
|
||||
if (!copyMemory(type, cmd.source(), cmd.destination(), entire, cmd.srcOrigin(),
|
||||
cmd.dstOrigin(), cmd.size(), cmd.srcRect(), cmd.dstRect())) {
|
||||
cmd.dstOrigin(), cmd.size(), cmd.srcRect(), cmd.dstRect(), cmd.copyMetadata())) {
|
||||
cmd.setStatus(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
||||
|
||||
@@ -470,7 +470,9 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
const amd::Coord3D& dstOrigin, //!< destination memory object
|
||||
const amd::Coord3D& size, //!< copy size
|
||||
const amd::BufferRect& srcRect, //!< region of source for copy
|
||||
const amd::BufferRect& dstRect //!< region of destination for copy
|
||||
const amd::BufferRect& dstRect, //!< region of destination for copy
|
||||
amd::CopyMetadata copyMetadata =
|
||||
amd::CopyMetadata() //!< Memory copy MetaData
|
||||
);
|
||||
|
||||
//! Updates AQL header for the upcomming dispatch
|
||||
|
||||
@@ -235,6 +235,25 @@ class Event : public RuntimeObject {
|
||||
void setEventScope(int32_t scope) { event_scope_ = scope; }
|
||||
};
|
||||
|
||||
union CopyMetadata {
|
||||
|
||||
enum CopyEnginePreference {
|
||||
NONE = 0,
|
||||
BLIT = 1,
|
||||
SDMA = 2,
|
||||
CPDMA = 3
|
||||
};
|
||||
|
||||
struct {
|
||||
uint32_t isAsync_ : 1;
|
||||
uint32_t copyEnginePreference_ : 2;
|
||||
};
|
||||
uint32_t flags_;
|
||||
CopyMetadata() : flags_(0){}
|
||||
CopyMetadata(bool isAsync, CopyEnginePreference copyEnginePreference)
|
||||
: isAsync_(isAsync), copyEnginePreference_(copyEnginePreference){}
|
||||
};
|
||||
|
||||
/*! \brief An operation that is submitted to a command queue.
|
||||
*
|
||||
* %Command is the abstract base type of all OpenCL operations
|
||||
@@ -463,18 +482,20 @@ class ReadMemoryCommand : public OneMemoryArgCommand {
|
||||
|
||||
BufferRect bufRect_; //!< Buffer rectangle information
|
||||
BufferRect hostRect_; //!< Host memory rectangle information
|
||||
|
||||
amd::CopyMetadata copyMetadata_;
|
||||
public:
|
||||
//! Construct a new ReadMemoryCommand
|
||||
ReadMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList,
|
||||
Memory& memory, Coord3D origin, Coord3D size, void* hostPtr,
|
||||
size_t rowPitch = 0, size_t slicePitch = 0)
|
||||
size_t rowPitch = 0, size_t slicePitch = 0,
|
||||
amd::CopyMetadata copyMetadata = amd::CopyMetadata())
|
||||
: OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
|
||||
origin_(origin),
|
||||
size_(size),
|
||||
hostPtr_(hostPtr),
|
||||
rowPitch_(rowPitch),
|
||||
slicePitch_(slicePitch) {
|
||||
slicePitch_(slicePitch),
|
||||
copyMetadata_(copyMetadata) {
|
||||
// Sanity checks
|
||||
assert(hostPtr != NULL && "hostPtr cannot be null");
|
||||
assert(size.c[0] > 0 && "invalid");
|
||||
@@ -483,7 +504,8 @@ class ReadMemoryCommand : public OneMemoryArgCommand {
|
||||
//! Construct a new ReadMemoryCommand
|
||||
ReadMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList,
|
||||
Memory& memory, Coord3D origin, Coord3D size, void* hostPtr,
|
||||
const BufferRect& bufRect, const BufferRect& hostRect)
|
||||
const BufferRect& bufRect, const BufferRect& hostRect,
|
||||
amd::CopyMetadata copyMetadata = amd::CopyMetadata())
|
||||
: OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
|
||||
origin_(origin),
|
||||
size_(size),
|
||||
@@ -491,7 +513,8 @@ class ReadMemoryCommand : public OneMemoryArgCommand {
|
||||
rowPitch_(0),
|
||||
slicePitch_(0),
|
||||
bufRect_(bufRect),
|
||||
hostRect_(hostRect) {
|
||||
hostRect_(hostRect),
|
||||
copyMetadata_(copyMetadata) {
|
||||
// Sanity checks
|
||||
assert(hostPtr != NULL && "hostPtr cannot be null");
|
||||
assert(size.c[0] > 0 && "invalid");
|
||||
@@ -517,7 +540,8 @@ class ReadMemoryCommand : public OneMemoryArgCommand {
|
||||
const BufferRect& bufRect() const { return bufRect_; }
|
||||
//! Return the host rectangle information
|
||||
const BufferRect& hostRect() const { return hostRect_; }
|
||||
|
||||
//! Return the copy MetaData
|
||||
amd::CopyMetadata copyMetadata() const { return copyMetadata_; }
|
||||
//! Updates the host memory to read from
|
||||
void setSource(Memory& memory) { memory_ = &memory; }
|
||||
//! Updates the host memory to write to
|
||||
@@ -580,17 +604,20 @@ class WriteMemoryCommand : public OneMemoryArgCommand {
|
||||
|
||||
BufferRect bufRect_; //!< Buffer rectangle information
|
||||
BufferRect hostRect_; //!< Host memory rectangle information
|
||||
amd::CopyMetadata copyMetadata_;
|
||||
|
||||
public:
|
||||
WriteMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList,
|
||||
Memory& memory, Coord3D origin, Coord3D size, const void* hostPtr,
|
||||
size_t rowPitch = 0, size_t slicePitch = 0)
|
||||
size_t rowPitch = 0, size_t slicePitch = 0,
|
||||
amd::CopyMetadata copyMetadata = amd::CopyMetadata())
|
||||
: OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
|
||||
origin_(origin),
|
||||
size_(size),
|
||||
hostPtr_(hostPtr),
|
||||
rowPitch_(rowPitch),
|
||||
slicePitch_(slicePitch) {
|
||||
slicePitch_(slicePitch),
|
||||
copyMetadata_(copyMetadata){
|
||||
// Sanity checks
|
||||
assert(hostPtr != NULL && "hostPtr cannot be null");
|
||||
assert(size.c[0] > 0 && "invalid");
|
||||
@@ -598,7 +625,8 @@ class WriteMemoryCommand : public OneMemoryArgCommand {
|
||||
|
||||
WriteMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList,
|
||||
Memory& memory, Coord3D origin, Coord3D size, const void* hostPtr,
|
||||
const BufferRect& bufRect, const BufferRect& hostRect)
|
||||
const BufferRect& bufRect, const BufferRect& hostRect,
|
||||
amd::CopyMetadata copyMetadata = amd::CopyMetadata())
|
||||
: OneMemoryArgCommand(queue, cmdType, eventWaitList, memory),
|
||||
origin_(origin),
|
||||
size_(size),
|
||||
@@ -606,7 +634,8 @@ class WriteMemoryCommand : public OneMemoryArgCommand {
|
||||
rowPitch_(0),
|
||||
slicePitch_(0),
|
||||
bufRect_(bufRect),
|
||||
hostRect_(hostRect) {
|
||||
hostRect_(hostRect),
|
||||
copyMetadata_(copyMetadata){
|
||||
// Sanity checks
|
||||
assert(hostPtr != NULL && "hostPtr cannot be null");
|
||||
assert(size.c[0] > 0 && "invalid");
|
||||
@@ -632,7 +661,8 @@ class WriteMemoryCommand : public OneMemoryArgCommand {
|
||||
const BufferRect& bufRect() const { return bufRect_; }
|
||||
//! Return the host rectangle information
|
||||
const BufferRect& hostRect() const { return hostRect_; }
|
||||
|
||||
//! Return the copy MetaData
|
||||
amd::CopyMetadata copyMetadata() const { return copyMetadata_; }
|
||||
//! Updates the host memory to read from
|
||||
void setSource(const void* hostPtr) { hostPtr_ = hostPtr; }
|
||||
//! Updates the host memory to write to
|
||||
@@ -831,28 +861,31 @@ class CopyMemoryCommand : public TwoMemoryArgsCommand {
|
||||
|
||||
BufferRect srcRect_; //!< Source buffer rectangle information
|
||||
BufferRect dstRect_; //!< Destination buffer rectangle information
|
||||
|
||||
amd::CopyMetadata copyMetadata_;
|
||||
public:
|
||||
CopyMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList,
|
||||
Memory& srcMemory, Memory& dstMemory, Coord3D srcOrigin, Coord3D dstOrigin,
|
||||
Coord3D size)
|
||||
Coord3D size, amd::CopyMetadata copyMetadata = amd::CopyMetadata())
|
||||
: TwoMemoryArgsCommand(queue, cmdType, eventWaitList, srcMemory, dstMemory),
|
||||
srcOrigin_(srcOrigin),
|
||||
dstOrigin_(dstOrigin),
|
||||
size_(size) {
|
||||
size_(size),
|
||||
copyMetadata_(copyMetadata){
|
||||
// Sanity checks
|
||||
assert(size.c[0] > 0 && "invalid");
|
||||
}
|
||||
|
||||
CopyMemoryCommand(HostQueue& queue, cl_command_type cmdType, const EventWaitList& eventWaitList,
|
||||
Memory& srcMemory, Memory& dstMemory, Coord3D srcOrigin, Coord3D dstOrigin,
|
||||
Coord3D size, const BufferRect& srcRect, const BufferRect& dstRect)
|
||||
Coord3D size, const BufferRect& srcRect, const BufferRect& dstRect,
|
||||
amd::CopyMetadata copyMetadata = amd::CopyMetadata())
|
||||
: TwoMemoryArgsCommand(queue, cmdType, eventWaitList, srcMemory, dstMemory),
|
||||
srcOrigin_(srcOrigin),
|
||||
dstOrigin_(dstOrigin),
|
||||
size_(size),
|
||||
srcRect_(srcRect),
|
||||
dstRect_(dstRect) {
|
||||
dstRect_(dstRect),
|
||||
copyMetadata_(copyMetadata) {
|
||||
// Sanity checks
|
||||
assert(size.c[0] > 0 && "invalid");
|
||||
}
|
||||
@@ -875,7 +908,8 @@ class CopyMemoryCommand : public TwoMemoryArgsCommand {
|
||||
const BufferRect& srcRect() const { return srcRect_; }
|
||||
//! Return the destination buffer rectangle information
|
||||
const BufferRect& dstRect() const { return dstRect_; }
|
||||
|
||||
//! Return the copy MetaData
|
||||
amd::CopyMetadata copyMetadata() const { return copyMetadata_; }
|
||||
//! Updates the host memory to read from
|
||||
void setSource(Memory& srcMemory) { memory1_ = &srcMemory; }
|
||||
//! Updates the memory object to write to.
|
||||
|
||||
Ссылка в новой задаче
Block a user