SWDEV-347670 - StreamWait and StreamWrite on Windows
__amd_streamOpsWrite blitkernel in device-libs has only 3 args.
so getting rid of the 4th unused arg (sizeBytes)
Change-Id: I81cc1107f8b424bf58558c93a2495a1b878aef91
[ROCm/clr commit: e643406caa]
Este commit está contenido en:
@@ -39,7 +39,7 @@ const char* BlitLinearSourceCode = BLIT_KERNELS(
|
||||
|
||||
extern void __amd_copyBufferRectAligned(__global uint*, __global uint*, ulong4, ulong4, ulong4);
|
||||
|
||||
extern void __amd_streamOpsWrite(__global uint*, __global ulong*, ulong, ulong);
|
||||
extern void __amd_streamOpsWrite(__global uint*, __global ulong*, ulong);
|
||||
|
||||
extern void __amd_streamOpsWait(__global uint*, __global ulong*, ulong, ulong, ulong);
|
||||
|
||||
@@ -165,8 +165,8 @@ const char* BlitLinearSourceCode = BLIT_KERNELS(
|
||||
|
||||
const char* HipExtraSourceCode = BLIT_KERNELS(
|
||||
__kernel void __amd_rocclr_streamOpsWrite(__global uint* ptrInt, __global ulong* ptrUlong,
|
||||
ulong value, ulong sizeBytes) {
|
||||
__amd_streamOpsWrite(ptrInt, ptrUlong, value, sizeBytes);
|
||||
ulong value) {
|
||||
__amd_streamOpsWrite(ptrInt, ptrUlong, value);
|
||||
}
|
||||
|
||||
__kernel void __amd_rocclr_streamOpsWait(__global uint* ptrInt, __global ulong* ptrUlong,
|
||||
@@ -186,8 +186,8 @@ const char* HipExtraSourceCode = BLIT_KERNELS(
|
||||
|
||||
const char* HipExtraSourceCodeNoGWS = BLIT_KERNELS(
|
||||
__kernel void __amd_rocclr_streamOpsWrite(__global uint* ptrInt, __global ulong* ptrUlong,
|
||||
ulong value, ulong sizeBytes) {
|
||||
__amd_streamOpsWrite(ptrInt, ptrUlong, value, sizeBytes);
|
||||
ulong value) {
|
||||
__amd_streamOpsWrite(ptrInt, ptrUlong, value);
|
||||
}
|
||||
|
||||
__kernel void __amd_rocclr_streamOpsWait(__global uint* ptrInt, __global ulong* ptrUlong,
|
||||
|
||||
@@ -2520,13 +2520,11 @@ bool KernelBlitManager::streamOpsWrite(device::Memory& memory, uint64_t value,
|
||||
if (is32BitWrite) {
|
||||
setArgument(kernels_[blitType], 0, sizeof(cl_mem), &mem, offset);
|
||||
setArgument(kernels_[blitType], 1, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[blitType], 2, sizeof(uint32_t), &value);
|
||||
} else {
|
||||
setArgument(kernels_[blitType], 0, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem, offset);
|
||||
setArgument(kernels_[blitType], 2, sizeof(uint64_t), &value);
|
||||
}
|
||||
setArgument(kernels_[blitType], 3, sizeof(size_t), &sizeBytes);
|
||||
setArgument(kernels_[blitType], 2, sizeof(uint64_t), &value);
|
||||
// Create ND range object for the kernel's execution
|
||||
amd::NDRangeContainer ndrange(dim, globalWorkOffset, globalWorkSize, localWorkSize);
|
||||
// Execute the blit
|
||||
@@ -2555,16 +2553,13 @@ bool KernelBlitManager::streamOpsWait(device::Memory& memory, uint64_t value, si
|
||||
if (is32BitWait) {
|
||||
setArgument(kernels_[blitType], 0, sizeof(cl_mem), &mem, offset);
|
||||
setArgument(kernels_[blitType], 1, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[blitType], 2, sizeof(uint32_t), &value);
|
||||
setArgument(kernels_[blitType], 3, sizeof(uint32_t), &flags);
|
||||
setArgument(kernels_[blitType], 4, sizeof(uint32_t), &mask);
|
||||
} else {
|
||||
setArgument(kernels_[blitType], 0, sizeof(cl_mem), nullptr);
|
||||
setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem, offset);
|
||||
setArgument(kernels_[blitType], 2, sizeof(uint64_t), &value);
|
||||
setArgument(kernels_[blitType], 3, sizeof(uint64_t), &flags);
|
||||
setArgument(kernels_[blitType], 4, sizeof(uint64_t), &mask);
|
||||
}
|
||||
setArgument(kernels_[blitType], 2, sizeof(uint64_t), &value);
|
||||
setArgument(kernels_[blitType], 3, sizeof(uint64_t), &flags);
|
||||
setArgument(kernels_[blitType], 4, sizeof(uint64_t), &mask);
|
||||
|
||||
// Create ND range object for the kernel's execution
|
||||
amd::NDRangeContainer ndrange(dim, globalWorkOffset, globalWorkSize, localWorkSize);
|
||||
|
||||
@@ -660,6 +660,8 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
|
||||
info_.cooperativeGroups_ = settings().enableCoopGroups_;
|
||||
info_.cooperativeMultiDeviceGroups_ = settings().enableCoopMultiDeviceGroups_;
|
||||
// Enable StreamWrite and StreamWait for all devices
|
||||
info_.aqlBarrierValue_ = true;
|
||||
|
||||
if (amd::IS_HIP) {
|
||||
info_.largeBar_ = false;
|
||||
|
||||
@@ -1061,14 +1061,17 @@ VirtualGPU::~VirtualGPU() {
|
||||
amd::ScopedLock k(dev().lockAsyncOps());
|
||||
amd::ScopedLock lock(dev().vgpusAccess());
|
||||
|
||||
// Clear all timestamps, associated with this virtual GPU
|
||||
auto& mgmt = *queues_[MainEngine]->aql_mgmt_;
|
||||
for (uint32_t i = 0; i < AqlPacketMgmt::kAqlPacketsListSize; ++i) {
|
||||
if (mgmt.aql_vgpus_[i] == this) {
|
||||
mgmt.aql_vgpus_[i] = nullptr;
|
||||
mgmt.aql_events_[i].invalidate();
|
||||
if (queues_[MainEngine] != nullptr) {
|
||||
// Clear all timestamps, associated with this virtual GPU
|
||||
auto& mgmt = *queues_[MainEngine]->aql_mgmt_;
|
||||
for (uint32_t i = 0; i < AqlPacketMgmt::kAqlPacketsListSize; ++i) {
|
||||
if (mgmt.aql_vgpus_[i] == this) {
|
||||
mgmt.aql_vgpus_[i] = nullptr;
|
||||
mgmt.aql_events_[i].invalidate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Destroy RGP trace
|
||||
if (rgpCaptureEna()) {
|
||||
dev().rgpCaptureMgr()->FinishRGPTrace(this, true);
|
||||
|
||||
@@ -2567,7 +2567,6 @@ bool KernelBlitManager::streamOpsWrite(device::Memory& memory, uint64_t value,
|
||||
setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem, offset);
|
||||
setArgument(kernels_[blitType], 2, sizeof(uint64_t), &value);
|
||||
}
|
||||
setArgument(kernels_[blitType], 3, sizeof(size_t), &sizeBytes);
|
||||
// Create ND range object for the kernel's execution
|
||||
amd::NDRangeContainer ndrange(dim, globalWorkOffset, globalWorkSize, localWorkSize);
|
||||
// Execute the blit
|
||||
|
||||
Referencia en una nueva incidencia
Block a user