From 31c0525344f842bbf32f13e8fa70313f35cefda1 Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Tue, 19 Oct 2021 13:00:51 -0400 Subject: [PATCH] SWDEV-305527 - Changes to handle memset blit kernel that takes width, height and depth. This also fixes SWDEV-317261. Change-Id: Ic85f63a95d9d8f48884fc8c7fd95cbb496dfbbca [ROCm/clr commit: 7fb80a027a6bdf6f816c594374a7d5f568e87c98] --- projects/clr/rocclr/device/blit.cpp | 4 +- projects/clr/rocclr/device/blit.hpp | 2 + projects/clr/rocclr/device/blitcl.cpp | 19 +++ projects/clr/rocclr/device/pal/palblit.cpp | 7 +- projects/clr/rocclr/device/pal/palblit.hpp | 1 + projects/clr/rocclr/device/pal/palprogram.cpp | 4 +- projects/clr/rocclr/device/pal/palvirtual.cpp | 5 +- projects/clr/rocclr/device/rocm/rocblit.cpp | 139 +++++++++++++++++- projects/clr/rocclr/device/rocm/rocblit.hpp | 47 +++++- .../clr/rocclr/device/rocm/rocvirtual.cpp | 42 ++---- .../clr/rocclr/device/rocm/rocvirtual.hpp | 1 + 11 files changed, 224 insertions(+), 47 deletions(-) diff --git a/projects/clr/rocclr/device/blit.cpp b/projects/clr/rocclr/device/blit.cpp index 4bd854219f..999b9be754 100644 --- a/projects/clr/rocclr/device/blit.cpp +++ b/projects/clr/rocclr/device/blit.cpp @@ -553,8 +553,8 @@ bool HostBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMe } bool HostBlitManager::fillBuffer(device::Memory& memory, const void* pattern, size_t patternSize, - const amd::Coord3D& origin, const amd::Coord3D& size, bool entire, - bool forceBlit) const { + const amd::Coord3D& surface, const amd::Coord3D& origin, + const amd::Coord3D& size, bool entire, bool forceBlit) const { // Map memory void* fillMem = memory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0); if (fillMem == NULL) { diff --git a/projects/clr/rocclr/device/blit.hpp b/projects/clr/rocclr/device/blit.hpp index d9ac765550..794bfa15ca 100644 --- a/projects/clr/rocclr/device/blit.hpp +++ b/projects/clr/rocclr/device/blit.hpp @@ -178,6 +178,7 @@ class BlitManager : public amd::HeapObject { virtual bool fillBuffer(Memory& memory, //!< Memory object to fill with pattern const void* pattern, //!< Pattern data size_t patternSize, //!< Pattern size + const amd::Coord3D& surface, //!< Whole Surface of mem object. const amd::Coord3D& origin, //!< Destination origin const amd::Coord3D& size, //!< Size of the fill region bool entire = false, //!< Entire buffer will be updated @@ -331,6 +332,7 @@ class HostBlitManager : public device::BlitManager { virtual bool fillBuffer(device::Memory& memory, //!< Memory object to fill with pattern const void* pattern, //!< Pattern data size_t patternSize, //!< Pattern size + const amd::Coord3D& surface, //!< Whole Surface of mem object. const amd::Coord3D& origin, //!< Destination origin const amd::Coord3D& size, //!< Size of the fill region bool entire = false, //!< Entire buffer will be updated diff --git a/projects/clr/rocclr/device/blitcl.cpp b/projects/clr/rocclr/device/blitcl.cpp index 32bb5c9e90..22a98e70b6 100644 --- a/projects/clr/rocclr/device/blitcl.cpp +++ b/projects/clr/rocclr/device/blitcl.cpp @@ -27,6 +27,10 @@ const char* BlitLinearSourceCode = BLIT_KERNELS( extern void __amd_fillBufferAligned(__global uchar*, __global ushort*, __global uint*, __global ulong*, __constant uchar*, uint, ulong, ulong); + extern void __amd_fillBufferAligned2D(__global uchar*, __global ushort*, __global uint*, + __global ulong*, __constant uchar*, uint, ulong, ulong, + ulong, ulong); + extern void __amd_copyBuffer(__global uchar*, __global uchar*, ulong, ulong, ulong, uint); extern void __amd_copyBufferAligned(__global uint*, __global uint*, ulong, ulong, ulong, uint); @@ -47,6 +51,21 @@ const char* BlitLinearSourceCode = BLIT_KERNELS( pattern, patternSize, offset, size); } + __kernel void __amd_rocclr_fillBufferAligned2D(__global uchar* bufUChar, + __global ushort* bufUShort, + __global uint* bufUInt, + __global ulong* bufULong, + __constant uchar* pattern, + uint patternSize, + ulong offset, + ulong width, + ulong height, + ulong pitch) { + __amd_fillBufferAligned2D(bufUChar, bufUShort, bufUInt, bufULong, + pattern, patternSize, offset, width, height, + pitch); + } + __kernel void __amd_rocclr_copyBuffer(__global uchar* srcI, __global uchar* dstI, ulong srcOrigin, ulong dstOrigin, ulong size, uint remain) { diff --git a/projects/clr/rocclr/device/pal/palblit.cpp b/projects/clr/rocclr/device/pal/palblit.cpp index 96ea2e1a5a..0f2e842703 100644 --- a/projects/clr/rocclr/device/pal/palblit.cpp +++ b/projects/clr/rocclr/device/pal/palblit.cpp @@ -2101,8 +2101,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst } bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern, size_t patternSize, - const amd::Coord3D& origin, const amd::Coord3D& size, - bool entire, bool forceBlit) const { + const amd::Coord3D& surface, const amd::Coord3D& origin, + const amd::Coord3D& size, bool entire, bool forceBlit) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -2110,7 +2110,8 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern, if (setup_.disableFillBuffer_ || (!forceBlit && gpuMem(memory).isHostMemDirectAccess())) { gpu().releaseGpuMemoryFence(); - result = HostBlitManager::fillBuffer(memory, pattern, patternSize, origin, size, entire); + result = HostBlitManager::fillBuffer(memory, pattern, patternSize, size, origin, size, + entire); synchronize(); return result; } else { diff --git a/projects/clr/rocclr/device/pal/palblit.hpp b/projects/clr/rocclr/device/pal/palblit.hpp index b8c1d59b3f..b31600eb06 100644 --- a/projects/clr/rocclr/device/pal/palblit.hpp +++ b/projects/clr/rocclr/device/pal/palblit.hpp @@ -344,6 +344,7 @@ class KernelBlitManager : public DmaBlitManager { virtual bool fillBuffer(device::Memory& memory, //!< Memory object to fill with pattern const void* pattern, //!< Pattern data size_t patternSize, //!< Pattern size + const amd::Coord3D& surface, //!< Whole Surface of mem object. const amd::Coord3D& origin, //!< Destination origin const amd::Coord3D& size, //!< Size of the fill region bool entire = false, //!< Entire buffer will be updated diff --git a/projects/clr/rocclr/device/pal/palprogram.cpp b/projects/clr/rocclr/device/pal/palprogram.cpp index 76f89a0389..4b3c75d881 100644 --- a/projects/clr/rocclr/device/pal/palprogram.cpp +++ b/projects/clr/rocclr/device/pal/palprogram.cpp @@ -117,8 +117,8 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t if ((cpuAccess_ == nullptr) && zero && !prog.isInternal()) { uint64_t pattern = 0; size_t patternSize = ((size % sizeof(pattern)) == 0) ? sizeof(pattern) : 1; - prog.palDevice().xferMgr().fillBuffer(*gpuAccess_, &pattern, patternSize, amd::Coord3D(0), - amd::Coord3D(size)); + prog.palDevice().xferMgr().fillBuffer(*gpuAccess_, &pattern, patternSize, amd::Coord3D(size), + amd::Coord3D(0), amd::Coord3D(size)); } switch (segment) { diff --git a/projects/clr/rocclr/device/pal/palvirtual.cpp b/projects/clr/rocclr/device/pal/palvirtual.cpp index 3e18ccadde..02a5339d39 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/device/pal/palvirtual.cpp @@ -810,7 +810,8 @@ bool VirtualGPU::createVirtualQueue(uint deviceQueueSize) { uint64_t pattern = 0; amd::Coord3D origin(0, 0, 0); amd::Coord3D region(virtualQueue_->size(), 0, 0); - if (!dev().xferMgr().fillBuffer(*virtualQueue_, &pattern, sizeof(pattern), origin, region)) { + if (!dev().xferMgr().fillBuffer(*virtualQueue_, &pattern, sizeof(pattern), region, origin, + region)) { return false; } @@ -1826,7 +1827,7 @@ bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const pattern = fillValue; patternSize = elemSize; } - result = blitMgr().fillBuffer(*memory, pattern, patternSize, realOrigin, realSize, + result = blitMgr().fillBuffer(*memory, pattern, patternSize, realSize, realOrigin, realSize, amdMemory->isEntirelyCovered(origin, size), forceBlit); if (nullptr != bufferFromImage) { bufferFromImage->release(); diff --git a/projects/clr/rocclr/device/rocm/rocblit.cpp b/projects/clr/rocclr/device/rocm/rocblit.cpp index aa7e941908..b157705557 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.cpp +++ b/projects/clr/rocclr/device/rocm/rocblit.cpp @@ -1941,8 +1941,35 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst // ================================================================================================ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern, size_t patternSize, - const amd::Coord3D& origin, const amd::Coord3D& size, - bool entire, bool forceBlit) const { + const amd::Coord3D& surface, const amd::Coord3D& origin, + const amd::Coord3D& size, bool entire, bool forceBlit) const { + + guarantee(size[0] > 0 && size[1] > 0 && size[2] > 0, "Dimension cannot be 0"); + + if (size[1] == 1 && size[2] == 1) { + return fillBuffer1D(memory, pattern, patternSize, surface, origin, size, entire, forceBlit); + } else if (size[2] == 1) { + return fillBuffer2D(memory, pattern, patternSize, surface, origin, size, entire, forceBlit); + } else { + bool ret_val = true; + amd::Coord3D my_origin(origin); + amd::Coord3D my_region{surface[1], surface[2], size[2]}; + amd::BufferRect rect; + rect.create(static_cast(my_origin), static_cast(my_region), surface[0], 0); + for (size_t slice = 0; slice < size[2]; ++slice) { + const size_t row_offset = rect.offset(0, 0, slice); + amd::Coord3D new_origin(row_offset, origin[1], origin[2]); + ret_val |= fillBuffer2D(memory, pattern, patternSize, surface, new_origin, size, entire, + forceBlit); + } + return ret_val; + } +} + +// ================================================================================================ +bool KernelBlitManager::fillBuffer1D(device::Memory& memory, const void* pattern, size_t patternSize, + const amd::Coord3D& surface, const amd::Coord3D& origin, + const amd::Coord3D& size, bool entire, bool forceBlit) const { amd::ScopedLock k(lockXferOps_); bool result = false; @@ -1950,7 +1977,7 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern, if (setup_.disableFillBuffer_ || (!forceBlit && memory.isHostMemDirectAccess())) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); - result = HostBlitManager::fillBuffer(memory, pattern, patternSize, origin, size, entire); + result = HostBlitManager::fillBuffer(memory, pattern, patternSize, size, origin, size, entire); synchronize(); return result; } else { @@ -2044,6 +2071,112 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern, return result; } +// ================================================================================================ +bool KernelBlitManager::fillBuffer2D(device::Memory& memory, const void* pattern, + size_t patternSize, const amd::Coord3D& surface, + const amd::Coord3D& origin, const amd::Coord3D& size, + bool entire, bool forceBlit) const { + + amd::ScopedLock k(lockXferOps_); + bool result = false; + + // Use host fill if memory has direct access + if (setup_.disableFillBuffer_ || (!forceBlit && memory.isHostMemDirectAccess())) { + // Stall GPU before CPU access + gpu().releaseGpuMemoryFence(); + result = HostBlitManager::fillBuffer(memory, pattern, patternSize, size, origin, size, entire); + synchronize(); + return result; + } else { + uint fillType = FillBufferAligned2D; + uint64_t fillSizeX = size[0]/patternSize; + uint64_t fillSizeY = size[1]/patternSize; + + size_t globalWorkOffset[3] = {0, 0, 0}; + size_t globalWorkSize[3] = {amd::alignUp(fillSizeX, 16), + amd::alignUp(fillSizeY, 16), 1}; + size_t localWorkSize [3] = {16, 16, 1}; + + uint32_t alignment = (patternSize & 0x7) == 0 ? + sizeof(uint64_t) : + (patternSize & 0x3) == 0 ? + sizeof(uint32_t) : + (patternSize & 0x1) == 0 ? + sizeof(uint16_t) : sizeof(uint8_t); + + cl_mem mem = as_cl(memory.owner()); + if (alignment == sizeof(uint64_t)) { + setArgument(kernels_[fillType], 0, sizeof(cl_mem), nullptr); + setArgument(kernels_[fillType], 1, sizeof(cl_mem), nullptr); + setArgument(kernels_[fillType], 2, sizeof(cl_mem), nullptr); + setArgument(kernels_[fillType], 3, sizeof(cl_mem), &mem); + } else if (alignment == sizeof(uint32_t)) { + setArgument(kernels_[fillType], 0, sizeof(cl_mem), nullptr); + setArgument(kernels_[fillType], 1, sizeof(cl_mem), nullptr); + setArgument(kernels_[fillType], 2, sizeof(cl_mem), &mem); + setArgument(kernels_[fillType], 3, sizeof(cl_mem), nullptr); + } else if (alignment == sizeof(uint16_t)) { + setArgument(kernels_[fillType], 0, sizeof(cl_mem), nullptr); + setArgument(kernels_[fillType], 1, sizeof(cl_mem), &mem); + setArgument(kernels_[fillType], 2, sizeof(cl_mem), nullptr); + setArgument(kernels_[fillType], 3, sizeof(cl_mem), nullptr); + } else { + setArgument(kernels_[fillType], 0, sizeof(cl_mem), &mem); + setArgument(kernels_[fillType], 1, sizeof(cl_mem), nullptr); + setArgument(kernels_[fillType], 2, sizeof(cl_mem), nullptr); + setArgument(kernels_[fillType], 3, sizeof(cl_mem), nullptr); + } + + Memory* gpuCB = dev().getRocMemory(constantBuffer_); + if (gpuCB == nullptr) { + return false; + } + + // Find offset in the current constant buffer to allow multipel fills + uint32_t constBufOffset = ConstantBufferOffset(); + auto constBuf = reinterpret_cast
(constantBuffer_->getHostMem()) + constBufOffset; + memcpy(constBuf, pattern, patternSize); + + mem = as_cl(gpuCB->owner()); + setArgument(kernels_[fillType], 4, sizeof(cl_mem), &mem, constBufOffset); + + uint64_t mem_origin = static_cast(origin[0]); + uint64_t width = static_cast(size[0]); + uint64_t height = static_cast(size[1]); + uint64_t pitch = static_cast(surface[0]); + + patternSize/= alignment; + mem_origin /= alignment; + + setArgument(kernels_[fillType], 5, sizeof(uint32_t), &patternSize); + setArgument(kernels_[fillType], 6, sizeof(mem_origin), &mem_origin); + setArgument(kernels_[fillType], 7, sizeof(width), &width); + setArgument(kernels_[fillType], 8, sizeof(height), &height); + setArgument(kernels_[fillType], 9, sizeof(pitch), &pitch); + + + // Create ND range object for the kernel's execution + amd::NDRangeContainer ndrange(2, globalWorkOffset, globalWorkSize, localWorkSize); + + // Execute the blit + address parameters = captureArguments(kernels_[fillType]); + result = gpu().submitKernelInternal(ndrange, *kernels_[fillType], parameters, nullptr); + releaseArguments(parameters); + } + + synchronize(); + + return result; +} + +// ================================================================================================ +bool KernelBlitManager::fillBuffer3D(device::Memory& memory, const void* pattern, + size_t patternSize, const amd::Coord3D& surface, + const amd::Coord3D& origin, const amd::Coord3D& size, + bool entire, bool forceBlit) const { + ShouldNotReachHere(); + return false; +} // ================================================================================================ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory, const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin, diff --git a/projects/clr/rocclr/device/rocm/rocblit.hpp b/projects/clr/rocclr/device/rocm/rocblit.hpp index acbf3611d5..f63c9b2da2 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.hpp +++ b/projects/clr/rocclr/device/rocm/rocblit.hpp @@ -231,6 +231,7 @@ class KernelBlitManager : public DmaBlitManager { public: enum { FillBufferAligned = 0, + FillBufferAligned2D, BlitCopyBuffer, BlitCopyBufferAligned, BlitCopyBufferRect, @@ -366,12 +367,47 @@ class KernelBlitManager : public DmaBlitManager { virtual bool fillBuffer(device::Memory& memory, //!< Memory object to fill with pattern const void* pattern, //!< Pattern data size_t patternSize, //!< Pattern size + const amd::Coord3D& surface, //!< Whole Surface of mem object. const amd::Coord3D& origin, //!< Destination origin const amd::Coord3D& size, //!< Size of the fill region bool entire = false, //!< Entire buffer will be updated bool forceBlit = false //!< Force GPU Blit for fill ) const; + //! Fills a buffer memory with a pattern data + virtual bool fillBuffer1D(device::Memory& memory, //!< Memory object to fill with pattern + const void* pattern, //!< Pattern data + size_t patternSize, //!< Pattern size + const amd::Coord3D& surface, //!< Whole Surface of mem object. + const amd::Coord3D& origin, //!< Destination origin + const amd::Coord3D& size, //!< Size of the fill region + bool entire = false, //!< Entire buffer will be updated + bool forceBlit = false //!< Force GPU Blit for fill + ) const; + + //! Fills a buffer memory with a pattern data + virtual bool fillBuffer2D(device::Memory& memory, //!< Memory object to fill with pattern + const void* pattern, //!< Pattern data + size_t patternSize, //!< Pattern size + const amd::Coord3D& surface, //!< Whole Surface of mem object. + const amd::Coord3D& origin, //!< Destination origin + const amd::Coord3D& size, //!< Size of the fill region + bool entire = false, //!< Entire buffer will be updated + bool forceBlit = false //!< Force GPU Blit for fill + ) const; + + //! Fills a buffer memory with a pattern data + virtual bool fillBuffer3D(device::Memory& memory, //!< Memory object to fill with pattern + const void* pattern, //!< Pattern data + size_t patternSize, //!< Pattern size + const amd::Coord3D& surface, //!< Whole Surface of mem object. + const amd::Coord3D& origin, //!< Destination origin + const amd::Coord3D& size, //!< Size of the fill region + bool entire = false, //!< Entire buffer will be updated + bool forceBlit = false //!< Force GPU Blit for fill + ) const; + + //! Fills an image memory with a pattern data virtual bool fillImage(device::Memory& dstMemory, //!< Memory object to fill with pattern const void* pattern, //!< Pattern data @@ -483,11 +519,12 @@ class KernelBlitManager : public DmaBlitManager { }; static const char* BlitName[KernelBlitManager::BlitTotal] = { - "__amd_rocclr_fillBufferAligned", "__amd_rocclr_copyBuffer", "__amd_rocclr_copyBufferAligned", - "__amd_rocclr_copyBufferRect", "__amd_rocclr_copyBufferRectAligned", - "__amd_rocclr_streamOpsWrite", "__amd_rocclr_streamOpsWait", "__amd_rocclr_scheduler", - "__amd_rocclr_gwsInit", "__amd_rocclr_fillImage", "__amd_rocclr_copyImage", - "__amd_rocclr_copyImage1DA", "__amd_rocclr_copyImageToBuffer", "__amd_rocclr_copyBufferToImage", + "__amd_rocclr_fillBufferAligned", "__amd_rocclr_fillBufferAligned2D", "__amd_rocclr_copyBuffer", + "__amd_rocclr_copyBufferAligned", "__amd_rocclr_copyBufferRect", + "__amd_rocclr_copyBufferRectAligned", "__amd_rocclr_streamOpsWrite", "__amd_rocclr_streamOpsWait", + "__amd_rocclr_scheduler", "__amd_rocclr_gwsInit", "__amd_rocclr_fillImage", + "__amd_rocclr_copyImage", "__amd_rocclr_copyImage1DA", "__amd_rocclr_copyImageToBuffer", + "__amd_rocclr_copyBufferToImage" }; inline void KernelBlitManager::setArgument(amd::Kernel* kernel, size_t index, diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index ba9ace1372..1ecb09ea58 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -2211,8 +2211,9 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& cmd) { } bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const void* pattern, - size_t patternSize, const amd::Coord3D& origin, - const amd::Coord3D& size, bool forceBlit) { + size_t patternSize, const amd::Coord3D& surface, + const amd::Coord3D& origin, const amd::Coord3D& size, + bool forceBlit) { // Make sure VirtualGPU has an exclusive access to the resources amd::ScopedLock lock(execution()); @@ -2238,8 +2239,9 @@ bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const switch (type) { case CL_COMMAND_SVM_MEMFILL: case CL_COMMAND_FILL_BUFFER: { - amd::Coord3D realOrigin(origin[0]); - amd::Coord3D realSize(size[0]); + amd::Coord3D realSurf(surface[0], surface[1], surface[2]); + amd::Coord3D realOrigin(origin[0], origin[1], origin[2]); + amd::Coord3D realSize(size[0], size[1], size[2]); // Reprogram fill parameters if it's an IMAGE1D_BUFFER object if (imageBuffer) { size_t elemSize = amdMemory->asImage()->getImageFormat().getElementSize(); @@ -2250,7 +2252,8 @@ bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const pattern = fillValue; patternSize = elemSize; } - result = blitMgr().fillBuffer(*memory, pattern, patternSize, realOrigin, realSize, entire, forceBlit); + result = blitMgr().fillBuffer(*memory, pattern, patternSize, realSurf, realOrigin, + realSize, entire, forceBlit); break; } case CL_COMMAND_FILL_IMAGE: { @@ -2275,30 +2278,9 @@ void VirtualGPU::submitFillMemory(amd::FillMemoryCommand& cmd) { amd::ScopedLock lock(execution()); profilingBegin(cmd); - if (cmd.type() == CL_COMMAND_FILL_IMAGE) { - if (!fillMemory(cmd.type(), &cmd.memory(), cmd.pattern(), cmd.patternSize(), - cmd.origin(), cmd.size())) { - cmd.setStatus(CL_INVALID_OPERATION); - } - } else { - size_t width = cmd.size().c[0]; - size_t height = cmd.size().c[1]; - size_t depth = cmd.size().c[2]; - size_t pitch = cmd.surface().c[0]; - amd::Coord3D origin = cmd.origin(); - amd::Coord3D region{cmd.surface().c[1], cmd.surface().c[2], depth}; - amd::BufferRect rect; - rect.create(static_cast(origin), static_cast(region), - pitch, 0); - for (size_t slice = 0; slice < depth; slice++) { - for (size_t row = 0; row < height; row++) { - const size_t rowOffset = rect.offset(0, row, slice); - if (!fillMemory(cmd.type(), &cmd.memory(), cmd.pattern(), cmd.patternSize(), - amd::Coord3D{rowOffset, 0, 0}, amd::Coord3D{width, 1, 1})) { - cmd.setStatus(CL_INVALID_OPERATION); - } - } - } + if (!fillMemory(cmd.type(), &cmd.memory(), cmd.pattern(), cmd.patternSize(), + cmd.surface(), cmd.origin(), cmd.size())) { + cmd.setStatus(CL_INVALID_OPERATION); } profilingEnd(cmd); } @@ -2636,7 +2618,7 @@ bool VirtualGPU::createVirtualQueue(uint deviceQueueSize) amd::Coord3D origin(0, 0, 0); amd::Coord3D region(virtualQueue_->getSize(), 0, 0); - if (!dev().xferMgr().fillBuffer(*vqMem, &pattern, sizeof(pattern), origin, region)) { + if (!dev().xferMgr().fillBuffer(*vqMem, &pattern, sizeof(pattern), region, origin, region)) { return false; } diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp index 512f123bf5..5ee467de2e 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp @@ -436,6 +436,7 @@ class VirtualGPU : public device::VirtualDevice { amd::Memory* amdMemory, //!< memory object to fill const void* pattern, //!< pattern to fill the memory size_t patternSize, //!< pattern size + const amd::Coord3D& surface, //!< Whole Surface of mem object. const amd::Coord3D& origin, //!< memory origin const amd::Coord3D& size, //!< memory size for filling bool forceBlit = false //!< force shader blit path