SWDEV-305527 - Changes to handle memset blit kernel that takes width, height and depth. This also fixes SWDEV-317261.
Change-Id: Ic85f63a95d9d8f48884fc8c7fd95cbb496dfbbca
[ROCm/clr commit: 7fb80a027a]
Этот коммит содержится в:
коммит произвёл
Karthik Jayaprakash
родитель
8fb4e3478a
Коммит
31c0525344
@@ -553,8 +553,8 @@ bool HostBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMe
|
||||
}
|
||||
|
||||
bool HostBlitManager::fillBuffer(device::Memory& memory, const void* pattern, size_t patternSize,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size, bool entire,
|
||||
bool forceBlit) const {
|
||||
const amd::Coord3D& surface, const amd::Coord3D& origin,
|
||||
const amd::Coord3D& size, bool entire, bool forceBlit) const {
|
||||
// Map memory
|
||||
void* fillMem = memory.cpuMap(vDev_, (entire) ? Memory::CpuWriteOnly : 0);
|
||||
if (fillMem == NULL) {
|
||||
|
||||
@@ -178,6 +178,7 @@ class BlitManager : public amd::HeapObject {
|
||||
virtual bool fillBuffer(Memory& memory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
size_t patternSize, //!< Pattern size
|
||||
const amd::Coord3D& surface, //!< Whole Surface of mem object.
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the fill region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
@@ -331,6 +332,7 @@ class HostBlitManager : public device::BlitManager {
|
||||
virtual bool fillBuffer(device::Memory& memory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
size_t patternSize, //!< Pattern size
|
||||
const amd::Coord3D& surface, //!< Whole Surface of mem object.
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the fill region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
|
||||
@@ -27,6 +27,10 @@ const char* BlitLinearSourceCode = BLIT_KERNELS(
|
||||
extern void __amd_fillBufferAligned(__global uchar*, __global ushort*, __global uint*,
|
||||
__global ulong*, __constant uchar*, uint, ulong, ulong);
|
||||
|
||||
extern void __amd_fillBufferAligned2D(__global uchar*, __global ushort*, __global uint*,
|
||||
__global ulong*, __constant uchar*, uint, ulong, ulong,
|
||||
ulong, ulong);
|
||||
|
||||
extern void __amd_copyBuffer(__global uchar*, __global uchar*, ulong, ulong, ulong, uint);
|
||||
|
||||
extern void __amd_copyBufferAligned(__global uint*, __global uint*, ulong, ulong, ulong, uint);
|
||||
@@ -47,6 +51,21 @@ const char* BlitLinearSourceCode = BLIT_KERNELS(
|
||||
pattern, patternSize, offset, size);
|
||||
}
|
||||
|
||||
__kernel void __amd_rocclr_fillBufferAligned2D(__global uchar* bufUChar,
|
||||
__global ushort* bufUShort,
|
||||
__global uint* bufUInt,
|
||||
__global ulong* bufULong,
|
||||
__constant uchar* pattern,
|
||||
uint patternSize,
|
||||
ulong offset,
|
||||
ulong width,
|
||||
ulong height,
|
||||
ulong pitch) {
|
||||
__amd_fillBufferAligned2D(bufUChar, bufUShort, bufUInt, bufULong,
|
||||
pattern, patternSize, offset, width, height,
|
||||
pitch);
|
||||
}
|
||||
|
||||
__kernel void __amd_rocclr_copyBuffer(__global uchar* srcI, __global uchar* dstI,
|
||||
ulong srcOrigin, ulong dstOrigin, ulong size,
|
||||
uint remain) {
|
||||
|
||||
@@ -2101,8 +2101,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
|
||||
}
|
||||
|
||||
bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern, size_t patternSize,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire, bool forceBlit) const {
|
||||
const amd::Coord3D& surface, const amd::Coord3D& origin,
|
||||
const amd::Coord3D& size, bool entire, bool forceBlit) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -2110,7 +2110,8 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern,
|
||||
if (setup_.disableFillBuffer_ || (!forceBlit && gpuMem(memory).isHostMemDirectAccess())) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result = HostBlitManager::fillBuffer(memory, pattern, patternSize, origin, size, entire);
|
||||
result = HostBlitManager::fillBuffer(memory, pattern, patternSize, size, origin, size,
|
||||
entire);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
|
||||
@@ -344,6 +344,7 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
virtual bool fillBuffer(device::Memory& memory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
size_t patternSize, //!< Pattern size
|
||||
const amd::Coord3D& surface, //!< Whole Surface of mem object.
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the fill region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
|
||||
@@ -117,8 +117,8 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
|
||||
if ((cpuAccess_ == nullptr) && zero && !prog.isInternal()) {
|
||||
uint64_t pattern = 0;
|
||||
size_t patternSize = ((size % sizeof(pattern)) == 0) ? sizeof(pattern) : 1;
|
||||
prog.palDevice().xferMgr().fillBuffer(*gpuAccess_, &pattern, patternSize, amd::Coord3D(0),
|
||||
amd::Coord3D(size));
|
||||
prog.palDevice().xferMgr().fillBuffer(*gpuAccess_, &pattern, patternSize, amd::Coord3D(size),
|
||||
amd::Coord3D(0), amd::Coord3D(size));
|
||||
}
|
||||
|
||||
switch (segment) {
|
||||
|
||||
@@ -810,7 +810,8 @@ bool VirtualGPU::createVirtualQueue(uint deviceQueueSize) {
|
||||
uint64_t pattern = 0;
|
||||
amd::Coord3D origin(0, 0, 0);
|
||||
amd::Coord3D region(virtualQueue_->size(), 0, 0);
|
||||
if (!dev().xferMgr().fillBuffer(*virtualQueue_, &pattern, sizeof(pattern), origin, region)) {
|
||||
if (!dev().xferMgr().fillBuffer(*virtualQueue_, &pattern, sizeof(pattern), region, origin,
|
||||
region)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1826,7 +1827,7 @@ bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const
|
||||
pattern = fillValue;
|
||||
patternSize = elemSize;
|
||||
}
|
||||
result = blitMgr().fillBuffer(*memory, pattern, patternSize, realOrigin, realSize,
|
||||
result = blitMgr().fillBuffer(*memory, pattern, patternSize, realSize, realOrigin, realSize,
|
||||
amdMemory->isEntirelyCovered(origin, size), forceBlit);
|
||||
if (nullptr != bufferFromImage) {
|
||||
bufferFromImage->release();
|
||||
|
||||
@@ -1941,8 +1941,35 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
|
||||
|
||||
// ================================================================================================
|
||||
bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern, size_t patternSize,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire, bool forceBlit) const {
|
||||
const amd::Coord3D& surface, const amd::Coord3D& origin,
|
||||
const amd::Coord3D& size, bool entire, bool forceBlit) const {
|
||||
|
||||
guarantee(size[0] > 0 && size[1] > 0 && size[2] > 0, "Dimension cannot be 0");
|
||||
|
||||
if (size[1] == 1 && size[2] == 1) {
|
||||
return fillBuffer1D(memory, pattern, patternSize, surface, origin, size, entire, forceBlit);
|
||||
} else if (size[2] == 1) {
|
||||
return fillBuffer2D(memory, pattern, patternSize, surface, origin, size, entire, forceBlit);
|
||||
} else {
|
||||
bool ret_val = true;
|
||||
amd::Coord3D my_origin(origin);
|
||||
amd::Coord3D my_region{surface[1], surface[2], size[2]};
|
||||
amd::BufferRect rect;
|
||||
rect.create(static_cast<size_t*>(my_origin), static_cast<size_t*>(my_region), surface[0], 0);
|
||||
for (size_t slice = 0; slice < size[2]; ++slice) {
|
||||
const size_t row_offset = rect.offset(0, 0, slice);
|
||||
amd::Coord3D new_origin(row_offset, origin[1], origin[2]);
|
||||
ret_val |= fillBuffer2D(memory, pattern, patternSize, surface, new_origin, size, entire,
|
||||
forceBlit);
|
||||
}
|
||||
return ret_val;
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool KernelBlitManager::fillBuffer1D(device::Memory& memory, const void* pattern, size_t patternSize,
|
||||
const amd::Coord3D& surface, const amd::Coord3D& origin,
|
||||
const amd::Coord3D& size, bool entire, bool forceBlit) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
@@ -1950,7 +1977,7 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern,
|
||||
if (setup_.disableFillBuffer_ || (!forceBlit && memory.isHostMemDirectAccess())) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().releaseGpuMemoryFence();
|
||||
result = HostBlitManager::fillBuffer(memory, pattern, patternSize, origin, size, entire);
|
||||
result = HostBlitManager::fillBuffer(memory, pattern, patternSize, size, origin, size, entire);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
@@ -2044,6 +2071,112 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern,
|
||||
return result;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool KernelBlitManager::fillBuffer2D(device::Memory& memory, const void* pattern,
|
||||
size_t patternSize, const amd::Coord3D& surface,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire, bool forceBlit) const {
|
||||
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
// Use host fill if memory has direct access
|
||||
if (setup_.disableFillBuffer_ || (!forceBlit && memory.isHostMemDirectAccess())) {
|
||||
// Stall GPU before CPU access
|
||||
gpu().releaseGpuMemoryFence();
|
||||
result = HostBlitManager::fillBuffer(memory, pattern, patternSize, size, origin, size, entire);
|
||||
synchronize();
|
||||
return result;
|
||||
} else {
|
||||
uint fillType = FillBufferAligned2D;
|
||||
uint64_t fillSizeX = size[0]/patternSize;
|
||||
uint64_t fillSizeY = size[1]/patternSize;
|
||||
|
||||
size_t globalWorkOffset[3] = {0, 0, 0};
|
||||
size_t globalWorkSize[3] = {amd::alignUp(fillSizeX, 16),
|
||||
amd::alignUp(fillSizeY, 16), 1};
|
||||
size_t localWorkSize [3] = {16, 16, 1};
|
||||
|
||||
uint32_t alignment = (patternSize & 0x7) == 0 ?
|
||||
sizeof(uint64_t) :
|
||||
(patternSize & 0x3) == 0 ?
|
||||
sizeof(uint32_t) :
|
||||
(patternSize & 0x1) == 0 ?
|
||||
sizeof(uint16_t) : sizeof(uint8_t);
|
||||
|
||||
cl_mem mem = as_cl<amd::Memory>(memory.owner());
|
||||
if (alignment == sizeof(uint64_t)) {
|
||||
setArgument(kernels_[fillType], 0, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[fillType], 1, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[fillType], 2, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[fillType], 3, sizeof(cl_mem), &mem);
|
||||
} else if (alignment == sizeof(uint32_t)) {
|
||||
setArgument(kernels_[fillType], 0, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[fillType], 1, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[fillType], 2, sizeof(cl_mem), &mem);
|
||||
setArgument(kernels_[fillType], 3, sizeof(cl_mem), nullptr);
|
||||
} else if (alignment == sizeof(uint16_t)) {
|
||||
setArgument(kernels_[fillType], 0, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[fillType], 1, sizeof(cl_mem), &mem);
|
||||
setArgument(kernels_[fillType], 2, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[fillType], 3, sizeof(cl_mem), nullptr);
|
||||
} else {
|
||||
setArgument(kernels_[fillType], 0, sizeof(cl_mem), &mem);
|
||||
setArgument(kernels_[fillType], 1, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[fillType], 2, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[fillType], 3, sizeof(cl_mem), nullptr);
|
||||
}
|
||||
|
||||
Memory* gpuCB = dev().getRocMemory(constantBuffer_);
|
||||
if (gpuCB == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find offset in the current constant buffer to allow multipel fills
|
||||
uint32_t constBufOffset = ConstantBufferOffset();
|
||||
auto constBuf = reinterpret_cast<address>(constantBuffer_->getHostMem()) + constBufOffset;
|
||||
memcpy(constBuf, pattern, patternSize);
|
||||
|
||||
mem = as_cl<amd::Memory>(gpuCB->owner());
|
||||
setArgument(kernels_[fillType], 4, sizeof(cl_mem), &mem, constBufOffset);
|
||||
|
||||
uint64_t mem_origin = static_cast<uint64_t>(origin[0]);
|
||||
uint64_t width = static_cast<uint64_t>(size[0]);
|
||||
uint64_t height = static_cast<uint64_t>(size[1]);
|
||||
uint64_t pitch = static_cast<uint64_t>(surface[0]);
|
||||
|
||||
patternSize/= alignment;
|
||||
mem_origin /= alignment;
|
||||
|
||||
setArgument(kernels_[fillType], 5, sizeof(uint32_t), &patternSize);
|
||||
setArgument(kernels_[fillType], 6, sizeof(mem_origin), &mem_origin);
|
||||
setArgument(kernels_[fillType], 7, sizeof(width), &width);
|
||||
setArgument(kernels_[fillType], 8, sizeof(height), &height);
|
||||
setArgument(kernels_[fillType], 9, sizeof(pitch), &pitch);
|
||||
|
||||
|
||||
// Create ND range object for the kernel's execution
|
||||
amd::NDRangeContainer ndrange(2, globalWorkOffset, globalWorkSize, localWorkSize);
|
||||
|
||||
// Execute the blit
|
||||
address parameters = captureArguments(kernels_[fillType]);
|
||||
result = gpu().submitKernelInternal(ndrange, *kernels_[fillType], parameters, nullptr);
|
||||
releaseArguments(parameters);
|
||||
}
|
||||
|
||||
synchronize();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool KernelBlitManager::fillBuffer3D(device::Memory& memory, const void* pattern,
|
||||
size_t patternSize, const amd::Coord3D& surface,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire, bool forceBlit) const {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
// ================================================================================================
|
||||
bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
|
||||
@@ -231,6 +231,7 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
public:
|
||||
enum {
|
||||
FillBufferAligned = 0,
|
||||
FillBufferAligned2D,
|
||||
BlitCopyBuffer,
|
||||
BlitCopyBufferAligned,
|
||||
BlitCopyBufferRect,
|
||||
@@ -366,12 +367,47 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
virtual bool fillBuffer(device::Memory& memory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
size_t patternSize, //!< Pattern size
|
||||
const amd::Coord3D& surface, //!< Whole Surface of mem object.
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the fill region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
bool forceBlit = false //!< Force GPU Blit for fill
|
||||
) const;
|
||||
|
||||
//! Fills a buffer memory with a pattern data
|
||||
virtual bool fillBuffer1D(device::Memory& memory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
size_t patternSize, //!< Pattern size
|
||||
const amd::Coord3D& surface, //!< Whole Surface of mem object.
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the fill region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
bool forceBlit = false //!< Force GPU Blit for fill
|
||||
) const;
|
||||
|
||||
//! Fills a buffer memory with a pattern data
|
||||
virtual bool fillBuffer2D(device::Memory& memory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
size_t patternSize, //!< Pattern size
|
||||
const amd::Coord3D& surface, //!< Whole Surface of mem object.
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the fill region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
bool forceBlit = false //!< Force GPU Blit for fill
|
||||
) const;
|
||||
|
||||
//! Fills a buffer memory with a pattern data
|
||||
virtual bool fillBuffer3D(device::Memory& memory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
size_t patternSize, //!< Pattern size
|
||||
const amd::Coord3D& surface, //!< Whole Surface of mem object.
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the fill region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
bool forceBlit = false //!< Force GPU Blit for fill
|
||||
) const;
|
||||
|
||||
|
||||
//! Fills an image memory with a pattern data
|
||||
virtual bool fillImage(device::Memory& dstMemory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
@@ -483,11 +519,12 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
};
|
||||
|
||||
static const char* BlitName[KernelBlitManager::BlitTotal] = {
|
||||
"__amd_rocclr_fillBufferAligned", "__amd_rocclr_copyBuffer", "__amd_rocclr_copyBufferAligned",
|
||||
"__amd_rocclr_copyBufferRect", "__amd_rocclr_copyBufferRectAligned",
|
||||
"__amd_rocclr_streamOpsWrite", "__amd_rocclr_streamOpsWait", "__amd_rocclr_scheduler",
|
||||
"__amd_rocclr_gwsInit", "__amd_rocclr_fillImage", "__amd_rocclr_copyImage",
|
||||
"__amd_rocclr_copyImage1DA", "__amd_rocclr_copyImageToBuffer", "__amd_rocclr_copyBufferToImage",
|
||||
"__amd_rocclr_fillBufferAligned", "__amd_rocclr_fillBufferAligned2D", "__amd_rocclr_copyBuffer",
|
||||
"__amd_rocclr_copyBufferAligned", "__amd_rocclr_copyBufferRect",
|
||||
"__amd_rocclr_copyBufferRectAligned", "__amd_rocclr_streamOpsWrite", "__amd_rocclr_streamOpsWait",
|
||||
"__amd_rocclr_scheduler", "__amd_rocclr_gwsInit", "__amd_rocclr_fillImage",
|
||||
"__amd_rocclr_copyImage", "__amd_rocclr_copyImage1DA", "__amd_rocclr_copyImageToBuffer",
|
||||
"__amd_rocclr_copyBufferToImage"
|
||||
};
|
||||
|
||||
inline void KernelBlitManager::setArgument(amd::Kernel* kernel, size_t index,
|
||||
|
||||
@@ -2211,8 +2211,9 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& cmd) {
|
||||
}
|
||||
|
||||
bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const void* pattern,
|
||||
size_t patternSize, const amd::Coord3D& origin,
|
||||
const amd::Coord3D& size, bool forceBlit) {
|
||||
size_t patternSize, const amd::Coord3D& surface,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool forceBlit) {
|
||||
// Make sure VirtualGPU has an exclusive access to the resources
|
||||
amd::ScopedLock lock(execution());
|
||||
|
||||
@@ -2238,8 +2239,9 @@ bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const
|
||||
switch (type) {
|
||||
case CL_COMMAND_SVM_MEMFILL:
|
||||
case CL_COMMAND_FILL_BUFFER: {
|
||||
amd::Coord3D realOrigin(origin[0]);
|
||||
amd::Coord3D realSize(size[0]);
|
||||
amd::Coord3D realSurf(surface[0], surface[1], surface[2]);
|
||||
amd::Coord3D realOrigin(origin[0], origin[1], origin[2]);
|
||||
amd::Coord3D realSize(size[0], size[1], size[2]);
|
||||
// Reprogram fill parameters if it's an IMAGE1D_BUFFER object
|
||||
if (imageBuffer) {
|
||||
size_t elemSize = amdMemory->asImage()->getImageFormat().getElementSize();
|
||||
@@ -2250,7 +2252,8 @@ bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const
|
||||
pattern = fillValue;
|
||||
patternSize = elemSize;
|
||||
}
|
||||
result = blitMgr().fillBuffer(*memory, pattern, patternSize, realOrigin, realSize, entire, forceBlit);
|
||||
result = blitMgr().fillBuffer(*memory, pattern, patternSize, realSurf, realOrigin,
|
||||
realSize, entire, forceBlit);
|
||||
break;
|
||||
}
|
||||
case CL_COMMAND_FILL_IMAGE: {
|
||||
@@ -2275,30 +2278,9 @@ void VirtualGPU::submitFillMemory(amd::FillMemoryCommand& cmd) {
|
||||
amd::ScopedLock lock(execution());
|
||||
|
||||
profilingBegin(cmd);
|
||||
if (cmd.type() == CL_COMMAND_FILL_IMAGE) {
|
||||
if (!fillMemory(cmd.type(), &cmd.memory(), cmd.pattern(), cmd.patternSize(),
|
||||
cmd.origin(), cmd.size())) {
|
||||
cmd.setStatus(CL_INVALID_OPERATION);
|
||||
}
|
||||
} else {
|
||||
size_t width = cmd.size().c[0];
|
||||
size_t height = cmd.size().c[1];
|
||||
size_t depth = cmd.size().c[2];
|
||||
size_t pitch = cmd.surface().c[0];
|
||||
amd::Coord3D origin = cmd.origin();
|
||||
amd::Coord3D region{cmd.surface().c[1], cmd.surface().c[2], depth};
|
||||
amd::BufferRect rect;
|
||||
rect.create(static_cast<size_t*>(origin), static_cast<size_t*>(region),
|
||||
pitch, 0);
|
||||
for (size_t slice = 0; slice < depth; slice++) {
|
||||
for (size_t row = 0; row < height; row++) {
|
||||
const size_t rowOffset = rect.offset(0, row, slice);
|
||||
if (!fillMemory(cmd.type(), &cmd.memory(), cmd.pattern(), cmd.patternSize(),
|
||||
amd::Coord3D{rowOffset, 0, 0}, amd::Coord3D{width, 1, 1})) {
|
||||
cmd.setStatus(CL_INVALID_OPERATION);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!fillMemory(cmd.type(), &cmd.memory(), cmd.pattern(), cmd.patternSize(),
|
||||
cmd.surface(), cmd.origin(), cmd.size())) {
|
||||
cmd.setStatus(CL_INVALID_OPERATION);
|
||||
}
|
||||
profilingEnd(cmd);
|
||||
}
|
||||
@@ -2636,7 +2618,7 @@ bool VirtualGPU::createVirtualQueue(uint deviceQueueSize)
|
||||
amd::Coord3D origin(0, 0, 0);
|
||||
amd::Coord3D region(virtualQueue_->getSize(), 0, 0);
|
||||
|
||||
if (!dev().xferMgr().fillBuffer(*vqMem, &pattern, sizeof(pattern), origin, region)) {
|
||||
if (!dev().xferMgr().fillBuffer(*vqMem, &pattern, sizeof(pattern), region, origin, region)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -436,6 +436,7 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
amd::Memory* amdMemory, //!< memory object to fill
|
||||
const void* pattern, //!< pattern to fill the memory
|
||||
size_t patternSize, //!< pattern size
|
||||
const amd::Coord3D& surface, //!< Whole Surface of mem object.
|
||||
const amd::Coord3D& origin, //!< memory origin
|
||||
const amd::Coord3D& size, //!< memory size for filling
|
||||
bool forceBlit = false //!< force shader blit path
|
||||
|
||||
Ссылка в новой задаче
Block a user