From f57da595ffbecb6c9bf2c1f33b2e9aa58c10d2f5 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Tue, 3 Nov 2020 01:14:36 +0000 Subject: [PATCH] Fix hipmemset to match CUDA behavior Make hipMemset asynchronous with respect to the host unless destination pointer refers to pinned host memory. Change-Id: I5ba230e0cc622aa076dc94692a4c43a57dd2df7b [ROCm/hip commit: 08f827b28b0886ad45cb3b3a6cc4c26a0fbdc409] --- projects/hip/rocclr/hip_memory.cpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/projects/hip/rocclr/hip_memory.cpp b/projects/hip/rocclr/hip_memory.cpp index 818cb23494..66d952b75e 100755 --- a/projects/hip/rocclr/hip_memory.cpp +++ b/projects/hip/rocclr/hip_memory.cpp @@ -198,7 +198,24 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin if ((srcMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) && ((srcMemory->getContext().devices().size() == 1) && (dstMemory->getContext().devices().size() == 1))) { - command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + amd::HostQueue* pQueue = &queue; + + if (queueDevice != dstMemory->getContext().devices()[0]) { + pQueue = hip::getNullStream(dstMemory->getContext()); + amd::Command* cmd = queue.getLastQueuedCommand(true); + if (cmd != nullptr) { + waitList.push_back(cmd); + } + } + + if (queueDevice != srcMemory->getContext().devices()[0]) { + pQueue = hip::getNullStream(srcMemory->getContext()); + amd::Command* cmd = queue.getLastQueuedCommand(true); + if (cmd != nullptr) { + waitList.push_back(cmd); + } + } + command = new amd::CopyMemoryP2PCommand(*pQueue, CL_COMMAND_COPY_BUFFER, waitList, *srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, sizeBytes); if (command == nullptr) { return hipErrorOutOfMemory; @@ -1779,6 +1796,11 @@ hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeByt return hipErrorInvalidValue; } + if (!((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) & + memory->getMemFlags())) { + isAsync = true; + } + hipError_t hip_error = hipSuccess; amd::HostQueue* queue = hip::getQueue(stream);