SWDEV-297448 - Refactor streamOperations code
Reuse FillMemory function, that should fix the cache syncs from the host
Change-Id: Ieebec5fc3ed3a322b88d5187c8dca4805ec6f84b
[ROCm/clr commit: 24442be35a]
Este commit está contenido en:
@@ -1243,7 +1243,7 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) {
|
||||
|
||||
bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const void* pattern,
|
||||
size_t patternSize, const amd::Coord3D& origin,
|
||||
const amd::Coord3D& size) {
|
||||
const amd::Coord3D& size, bool forceBlit) {
|
||||
gpu::Memory* memory = dev().getGpuMemory(amdMemory);
|
||||
bool entire = amdMemory->isEntirelyCovered(origin, size);
|
||||
|
||||
@@ -1284,7 +1284,7 @@ bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const
|
||||
patternSize = elemSize;
|
||||
}
|
||||
result = blitMgr().fillBuffer(*memory, pattern, patternSize, realOrigin, realSize,
|
||||
amdMemory->isEntirelyCovered(origin, size));
|
||||
amdMemory->isEntirelyCovered(origin, size), forceBlit);
|
||||
if (NULL != bufferFromImage) {
|
||||
bufferFromImage->release();
|
||||
}
|
||||
|
||||
@@ -460,6 +460,7 @@ class VirtualGPU : public device::VirtualDevice, public CALGSLContext {
|
||||
size_t patternSize, //!< pattern size
|
||||
const amd::Coord3D& origin, //!< memory origin
|
||||
const amd::Coord3D& size //!< memory size for filling
|
||||
bool forceBlit = false //!< force shader blit path
|
||||
);
|
||||
|
||||
bool copyMemory(cl_command_type type, //!< the command type
|
||||
|
||||
@@ -1791,7 +1791,7 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) {
|
||||
|
||||
bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const void* pattern,
|
||||
size_t patternSize, const amd::Coord3D& origin,
|
||||
const amd::Coord3D& size) {
|
||||
const amd::Coord3D& size, bool forceBlit) {
|
||||
pal::Memory* memory = dev().getGpuMemory(amdMemory);
|
||||
bool entire = amdMemory->isEntirelyCovered(origin, size);
|
||||
|
||||
@@ -1832,7 +1832,7 @@ bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const
|
||||
patternSize = elemSize;
|
||||
}
|
||||
result = blitMgr().fillBuffer(*memory, pattern, patternSize, realOrigin, realSize,
|
||||
amdMemory->isEntirelyCovered(origin, size));
|
||||
amdMemory->isEntirelyCovered(origin, size), forceBlit);
|
||||
if (nullptr != bufferFromImage) {
|
||||
bufferFromImage->release();
|
||||
}
|
||||
@@ -2052,16 +2052,10 @@ void VirtualGPU::submitSvmFillMemory(amd::SvmFillMemoryCommand& vcmd) {
|
||||
amd::Coord3D size(fillSize, 1, 1);
|
||||
|
||||
assert((dstMemory->validateRegion(origin, size)) && "The incorrect fill size!");
|
||||
// Synchronize memory from host if necessary
|
||||
device::Memory::SyncFlags syncFlags;
|
||||
syncFlags.skipEntire_ = dstMemory->isEntirelyCovered(origin, size);
|
||||
memory->syncCacheFromHost(*this, syncFlags);
|
||||
|
||||
if (!fillMemory(vcmd.type(), dstMemory, vcmd.pattern(), vcmd.patternSize(), origin, size)) {
|
||||
vcmd.setStatus(CL_INVALID_OPERATION);
|
||||
}
|
||||
// Mark this as the most-recently written cache of the destination
|
||||
dstMemory->signalWrite(&gpuDevice_);
|
||||
} else {
|
||||
// for FGS capable device, fill CPU memory directly
|
||||
amd::SvmBuffer::memFill(vcmd.dst(), vcmd.pattern(), vcmd.patternSize(), vcmd.times());
|
||||
|
||||
@@ -607,7 +607,8 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
const void* pattern, //!< pattern to fill the memory
|
||||
size_t patternSize, //!< pattern size
|
||||
const amd::Coord3D& origin, //!< memory origin
|
||||
const amd::Coord3D& size //!< memory size for filling
|
||||
const amd::Coord3D& size, //!< memory size for filling
|
||||
bool forceBlit = false //!< force shader blit path
|
||||
);
|
||||
|
||||
bool copyMemory(cl_command_type type, //!< the command type
|
||||
|
||||
@@ -2147,7 +2147,7 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& cmd) {
|
||||
|
||||
bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const void* pattern,
|
||||
size_t patternSize, const amd::Coord3D& origin,
|
||||
const amd::Coord3D& size) {
|
||||
const amd::Coord3D& size, bool forceBlit) {
|
||||
// Make sure VirtualGPU has an exclusive access to the resources
|
||||
amd::ScopedLock lock(execution());
|
||||
|
||||
@@ -2185,7 +2185,7 @@ bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const
|
||||
pattern = fillValue;
|
||||
patternSize = elemSize;
|
||||
}
|
||||
result = blitMgr().fillBuffer(*memory, pattern, patternSize, realOrigin, realSize, entire);
|
||||
result = blitMgr().fillBuffer(*memory, pattern, patternSize, realOrigin, realSize, entire, forceBlit);
|
||||
break;
|
||||
}
|
||||
case CL_COMMAND_FILL_IMAGE: {
|
||||
@@ -2307,18 +2307,14 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
|
||||
} else if (type == ROCCLR_COMMAND_STREAM_WRITE_VALUE) {
|
||||
amd::Coord3D origin(offset);
|
||||
amd::Coord3D size(sizeBytes);
|
||||
bool entire = amdMemory->isEntirelyCovered(origin, size);
|
||||
|
||||
// Ensure memory ordering preceding the write
|
||||
dispatchBarrierPacket(kBarrierPacketReleaseHeader);
|
||||
|
||||
// Use GPU Blit to write
|
||||
bool result = blitMgr().fillBuffer(*memory, &value, sizeBytes, origin, size, entire, true);
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Writting value: 0x%lx", value);
|
||||
|
||||
if (!result) {
|
||||
LogError("submitStreamOperation: Write failed!");
|
||||
if (!fillMemory(CL_COMMAND_FILL_BUFFER, amdMemory, &value, sizeBytes, origin, size, true)) {
|
||||
cmd.setStatus(CL_INVALID_OPERATION);
|
||||
}
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Writing value: 0x%lx", value);
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
@@ -2348,16 +2344,10 @@ void VirtualGPU::submitSvmFillMemory(amd::SvmFillMemoryCommand& cmd) {
|
||||
amd::Coord3D size(fillSize, 1, 1);
|
||||
|
||||
assert((dstMemory->validateRegion(origin, size)) && "The incorrect fill size!");
|
||||
// Synchronize memory from host if necessary
|
||||
device::Memory::SyncFlags syncFlags;
|
||||
syncFlags.skipEntire_ = dstMemory->isEntirelyCovered(origin, size);
|
||||
memory->syncCacheFromHost(*this, syncFlags);
|
||||
|
||||
if (!fillMemory(cmd.type(), dstMemory, cmd.pattern(), cmd.patternSize(), origin, size)) {
|
||||
if (!fillMemory(cmd.type(), dstMemory, cmd.pattern(), cmd.patternSize(), origin, size, true)) {
|
||||
cmd.setStatus(CL_INVALID_OPERATION);
|
||||
}
|
||||
// Mark this as the most-recently written cache of the destination
|
||||
dstMemory->signalWrite(&dev());
|
||||
} else {
|
||||
// Stall GPU for CPU access to memory
|
||||
releaseGpuMemoryFence();
|
||||
|
||||
@@ -415,7 +415,8 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
const void* pattern, //!< pattern to fill the memory
|
||||
size_t patternSize, //!< pattern size
|
||||
const amd::Coord3D& origin, //!< memory origin
|
||||
const amd::Coord3D& size //!< memory size for filling
|
||||
const amd::Coord3D& size, //!< memory size for filling
|
||||
bool forceBlit = false //!< force shader blit path
|
||||
);
|
||||
|
||||
//! Common function for memory copy used by both svm Copy and non-svm Copy
|
||||
|
||||
Referencia en una nueva incidencia
Block a user