diff --git a/projects/clr/rocclr/device/rocm/rocblit.cpp b/projects/clr/rocclr/device/rocm/rocblit.cpp index d2a1afbd4a..780289d2ea 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.cpp +++ b/projects/clr/rocclr/device/rocm/rocblit.cpp @@ -2461,7 +2461,7 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern, // ================================================================================================ bool KernelBlitManager::streamOpsWrite(device::Memory& memory, uint64_t value, - size_t sizeBytes) const { + size_t offset, size_t sizeBytes) const { amd::ScopedLock k(lockXferOps_); bool result = false; uint blitType = StreamOpsWrite; @@ -2474,12 +2474,12 @@ bool KernelBlitManager::streamOpsWrite(device::Memory& memory, uint64_t value, bool is32BitWrite = (sizeBytes == sizeof(uint32_t)) ? true : false; // Program kernels arguments for the write operation if (is32BitWrite) { - setArgument(kernels_[blitType], 0, sizeof(cl_mem), &mem); + setArgument(kernels_[blitType], 0, sizeof(cl_mem), &mem, offset); setArgument(kernels_[blitType], 1, sizeof(cl_mem), nullptr); setArgument(kernels_[blitType], 2, sizeof(uint32_t), &value); } else { setArgument(kernels_[blitType], 0, sizeof(cl_mem), nullptr); - setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem); + setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem, offset); setArgument(kernels_[blitType], 2, sizeof(uint64_t), &value); } setArgument(kernels_[blitType], 3, sizeof(size_t), &sizeBytes); @@ -2494,8 +2494,8 @@ bool KernelBlitManager::streamOpsWrite(device::Memory& memory, uint64_t value, } // ================================================================================================ -bool KernelBlitManager::streamOpsWait(device::Memory& memory, uint64_t value, size_t sizeBytes, - uint64_t flags, uint64_t mask) const { +bool KernelBlitManager::streamOpsWait(device::Memory& memory, uint64_t value, size_t offset, + size_t sizeBytes, uint64_t flags, uint64_t mask) const { amd::ScopedLock k(lockXferOps_); bool result = false; uint blitType = StreamOpsWait; @@ -2510,14 +2510,14 @@ bool KernelBlitManager::streamOpsWait(device::Memory& memory, uint64_t value, si bool is32BitWait = (sizeBytes == sizeof(uint32_t)) ? true : false; // Program kernels arguments for the wait operation if (is32BitWait) { - setArgument(kernels_[blitType], 0, sizeof(cl_mem), &mem); + setArgument(kernels_[blitType], 0, sizeof(cl_mem), &mem, offset); setArgument(kernels_[blitType], 1, sizeof(cl_mem), nullptr); setArgument(kernels_[blitType], 2, sizeof(uint32_t), &value); setArgument(kernels_[blitType], 3, sizeof(uint32_t), &flags); setArgument(kernels_[blitType], 4, sizeof(uint32_t), &mask); } else { setArgument(kernels_[blitType], 0, sizeof(cl_mem), nullptr); - setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem); + setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem, offset); setArgument(kernels_[blitType], 2, sizeof(uint64_t), &value); setArgument(kernels_[blitType], 3, sizeof(uint64_t), &flags); setArgument(kernels_[blitType], 4, sizeof(uint64_t), &mask); diff --git a/projects/clr/rocclr/device/rocm/rocblit.hpp b/projects/clr/rocclr/device/rocm/rocblit.hpp index f63c9b2da2..aee3151845 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.hpp +++ b/projects/clr/rocclr/device/rocm/rocblit.hpp @@ -429,12 +429,14 @@ class KernelBlitManager : public DmaBlitManager { //! Stream memory write operation - Write a 'value' at 'memory'. bool streamOpsWrite(device::Memory& memory, //!< Memory to write the 'value' uint64_t value, + size_t offset, size_t sizeBytes ) const; //! Stream memory ops- Waits for a 'value' at 'memory' and wait is released based on compare op. bool streamOpsWait(device::Memory& memory, //!< Memory contents to compare the 'value' against uint64_t value, + size_t offset, size_t sizeBytes, uint64_t flags, uint64_t mask @@ -483,7 +485,7 @@ class KernelBlitManager : public DmaBlitManager { void releaseArguments(address args) const; inline void setArgument(amd::Kernel* kernel, size_t index, - size_t size, const void* value, uint32_t offset = 0, + size_t size, const void* value, size_t offset = 0, const device::Memory* dev_mem = nullptr) const; uint32_t ConstantBufferOffset() const { @@ -528,7 +530,7 @@ static const char* BlitName[KernelBlitManager::BlitTotal] = { }; inline void KernelBlitManager::setArgument(amd::Kernel* kernel, size_t index, - size_t size, const void* value, uint32_t offset, + size_t size, const void* value, size_t offset, const device::Memory* dev_mem) const { const amd::KernelParameterDescriptor& desc = kernel->signature().at(index); diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 21b95a9209..7f36548cc2 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -2394,8 +2394,8 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) { else { // mask is applied on value before performing // the comparision defined by 'condition' - bool result = static_cast(blitMgr()).streamOpsWait(*memory, value, - sizeBytes, flags, mask); + bool result = static_cast(blitMgr()).streamOpsWait(*memory, value, offset, + sizeBytes, flags, mask); ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Waiting for value: 0x%lx." " Flags: 0x%lx mask: 0x%lx", value, flags, mask); if (!result) { @@ -2405,12 +2405,11 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) { } else if (type == ROCCLR_COMMAND_STREAM_WRITE_VALUE) { amd::Coord3D origin(offset); amd::Coord3D size(sizeBytes); - // Ensure memory ordering preceding the write dispatchBarrierPacket(kBarrierPacketReleaseHeader); bool result = static_cast(blitMgr()).streamOpsWrite(*memory, value, - sizeBytes); + offset, sizeBytes); ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Writing value: 0x%lx", value); if (!result) { LogError("submitStreamOperation: Write failed!");