SWDEV-297448 - Refactor streamOperations code

Reuse FillMemory function, that should fix the cache syncs from the host

Change-Id: Ieebec5fc3ed3a322b88d5187c8dca4805ec6f84b


[ROCm/clr commit: 24442be35a]
Este commit está contenido en:
Saleel Kudchadker
2021-08-24 11:25:28 -07:00
padre f246b4a097
commit 8e13d117e4
Se han modificado 6 ficheros con 15 adiciones y 28 borrados
+2 -2
Ver fichero
@@ -1243,7 +1243,7 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) {
bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const void* pattern,
size_t patternSize, const amd::Coord3D& origin,
const amd::Coord3D& size) {
const amd::Coord3D& size, bool forceBlit) {
gpu::Memory* memory = dev().getGpuMemory(amdMemory);
bool entire = amdMemory->isEntirelyCovered(origin, size);
@@ -1284,7 +1284,7 @@ bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const
patternSize = elemSize;
}
result = blitMgr().fillBuffer(*memory, pattern, patternSize, realOrigin, realSize,
amdMemory->isEntirelyCovered(origin, size));
amdMemory->isEntirelyCovered(origin, size), forceBlit);
if (NULL != bufferFromImage) {
bufferFromImage->release();
}
@@ -460,6 +460,7 @@ class VirtualGPU : public device::VirtualDevice, public CALGSLContext {
size_t patternSize, //!< pattern size
const amd::Coord3D& origin, //!< memory origin
const amd::Coord3D& size //!< memory size for filling
bool forceBlit = false //!< force shader blit path
);
bool copyMemory(cl_command_type type, //!< the command type
+2 -8
Ver fichero
@@ -1791,7 +1791,7 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) {
bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const void* pattern,
size_t patternSize, const amd::Coord3D& origin,
const amd::Coord3D& size) {
const amd::Coord3D& size, bool forceBlit) {
pal::Memory* memory = dev().getGpuMemory(amdMemory);
bool entire = amdMemory->isEntirelyCovered(origin, size);
@@ -1832,7 +1832,7 @@ bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const
patternSize = elemSize;
}
result = blitMgr().fillBuffer(*memory, pattern, patternSize, realOrigin, realSize,
amdMemory->isEntirelyCovered(origin, size));
amdMemory->isEntirelyCovered(origin, size), forceBlit);
if (nullptr != bufferFromImage) {
bufferFromImage->release();
}
@@ -2052,16 +2052,10 @@ void VirtualGPU::submitSvmFillMemory(amd::SvmFillMemoryCommand& vcmd) {
amd::Coord3D size(fillSize, 1, 1);
assert((dstMemory->validateRegion(origin, size)) && "The incorrect fill size!");
// Synchronize memory from host if necessary
device::Memory::SyncFlags syncFlags;
syncFlags.skipEntire_ = dstMemory->isEntirelyCovered(origin, size);
memory->syncCacheFromHost(*this, syncFlags);
if (!fillMemory(vcmd.type(), dstMemory, vcmd.pattern(), vcmd.patternSize(), origin, size)) {
vcmd.setStatus(CL_INVALID_OPERATION);
}
// Mark this as the most-recently written cache of the destination
dstMemory->signalWrite(&gpuDevice_);
} else {
// for FGS capable device, fill CPU memory directly
amd::SvmBuffer::memFill(vcmd.dst(), vcmd.pattern(), vcmd.patternSize(), vcmd.times());
+2 -1
Ver fichero
@@ -607,7 +607,8 @@ class VirtualGPU : public device::VirtualDevice {
const void* pattern, //!< pattern to fill the memory
size_t patternSize, //!< pattern size
const amd::Coord3D& origin, //!< memory origin
const amd::Coord3D& size //!< memory size for filling
const amd::Coord3D& size, //!< memory size for filling
bool forceBlit = false //!< force shader blit path
);
bool copyMemory(cl_command_type type, //!< the command type
+6 -16
Ver fichero
@@ -2147,7 +2147,7 @@ void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& cmd) {
bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const void* pattern,
size_t patternSize, const amd::Coord3D& origin,
const amd::Coord3D& size) {
const amd::Coord3D& size, bool forceBlit) {
// Make sure VirtualGPU has an exclusive access to the resources
amd::ScopedLock lock(execution());
@@ -2185,7 +2185,7 @@ bool VirtualGPU::fillMemory(cl_command_type type, amd::Memory* amdMemory, const
pattern = fillValue;
patternSize = elemSize;
}
result = blitMgr().fillBuffer(*memory, pattern, patternSize, realOrigin, realSize, entire);
result = blitMgr().fillBuffer(*memory, pattern, patternSize, realOrigin, realSize, entire, forceBlit);
break;
}
case CL_COMMAND_FILL_IMAGE: {
@@ -2307,18 +2307,14 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
} else if (type == ROCCLR_COMMAND_STREAM_WRITE_VALUE) {
amd::Coord3D origin(offset);
amd::Coord3D size(sizeBytes);
bool entire = amdMemory->isEntirelyCovered(origin, size);
// Ensure memory ordering preceding the write
dispatchBarrierPacket(kBarrierPacketReleaseHeader);
// Use GPU Blit to write
bool result = blitMgr().fillBuffer(*memory, &value, sizeBytes, origin, size, entire, true);
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Writting value: 0x%lx", value);
if (!result) {
LogError("submitStreamOperation: Write failed!");
if (!fillMemory(CL_COMMAND_FILL_BUFFER, amdMemory, &value, sizeBytes, origin, size, true)) {
cmd.setStatus(CL_INVALID_OPERATION);
}
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Writing value: 0x%lx", value);
} else {
ShouldNotReachHere();
}
@@ -2348,16 +2344,10 @@ void VirtualGPU::submitSvmFillMemory(amd::SvmFillMemoryCommand& cmd) {
amd::Coord3D size(fillSize, 1, 1);
assert((dstMemory->validateRegion(origin, size)) && "The incorrect fill size!");
// Synchronize memory from host if necessary
device::Memory::SyncFlags syncFlags;
syncFlags.skipEntire_ = dstMemory->isEntirelyCovered(origin, size);
memory->syncCacheFromHost(*this, syncFlags);
if (!fillMemory(cmd.type(), dstMemory, cmd.pattern(), cmd.patternSize(), origin, size)) {
if (!fillMemory(cmd.type(), dstMemory, cmd.pattern(), cmd.patternSize(), origin, size, true)) {
cmd.setStatus(CL_INVALID_OPERATION);
}
// Mark this as the most-recently written cache of the destination
dstMemory->signalWrite(&dev());
} else {
// Stall GPU for CPU access to memory
releaseGpuMemoryFence();
+2 -1
Ver fichero
@@ -415,7 +415,8 @@ class VirtualGPU : public device::VirtualDevice {
const void* pattern, //!< pattern to fill the memory
size_t patternSize, //!< pattern size
const amd::Coord3D& origin, //!< memory origin
const amd::Coord3D& size //!< memory size for filling
const amd::Coord3D& size, //!< memory size for filling
bool forceBlit = false //!< force shader blit path
);
//! Common function for memory copy used by both svm Copy and non-svm Copy