Fix hipmemset to match CUDA behavior

Make hipMemset asynchronous with respect to the host unless
destination pointer refers to pinned host memory.

Change-Id: I5ba230e0cc622aa076dc94692a4c43a57dd2df7b


[ROCm/hip commit: 08f827b28b]
This commit is contained in:
Rahul Garg
2020-11-03 01:14:36 +00:00
parent 3e7e94ce07
commit f57da595ff
+23 -1
View File
@@ -198,7 +198,24 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
if ((srcMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) &&
((srcMemory->getContext().devices().size() == 1) &&
(dstMemory->getContext().devices().size() == 1))) {
command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
amd::HostQueue* pQueue = &queue;
if (queueDevice != dstMemory->getContext().devices()[0]) {
pQueue = hip::getNullStream(dstMemory->getContext());
amd::Command* cmd = queue.getLastQueuedCommand(true);
if (cmd != nullptr) {
waitList.push_back(cmd);
}
}
if (queueDevice != srcMemory->getContext().devices()[0]) {
pQueue = hip::getNullStream(srcMemory->getContext());
amd::Command* cmd = queue.getLastQueuedCommand(true);
if (cmd != nullptr) {
waitList.push_back(cmd);
}
}
command = new amd::CopyMemoryP2PCommand(*pQueue, CL_COMMAND_COPY_BUFFER, waitList,
*srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, sizeBytes);
if (command == nullptr) {
return hipErrorOutOfMemory;
@@ -1779,6 +1796,11 @@ hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeByt
return hipErrorInvalidValue;
}
if (!((CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_USE_HOST_PTR) &
memory->getMemFlags())) {
isAsync = true;
}
hipError_t hip_error = hipSuccess;
amd::HostQueue* queue = hip::getQueue(stream);