From fa6ed89cff5afa2a40efb58f74ade370ffa19c56 Mon Sep 17 00:00:00 2001 From: Christophe Paquot Date: Wed, 26 Feb 2020 08:41:18 -0800 Subject: [PATCH] Blocking and default streams' sync: Add hip::syncStreams(dev) to sync blocking streams on a given device. hip::syncStreams(void) should only sync streams on the current device. Change-Id: Ib6b0735215fa0ed12c646ebd029e9763ee3712ce --- vdi/hip_internal.hpp | 3 ++- vdi/hip_memory.cpp | 22 ++++++++++++++-------- vdi/hip_stream.cpp | 10 ++++++++-- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp index ad05e51b47..86012438fa 100644 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -111,8 +111,9 @@ namespace hip { /// Get default stream of the thread extern amd::HostQueue* getNullStream(); /// Sync Blocking streams on the current device - /// TODO: It currently syncs all Blocking streams on all devices extern void syncStreams(); + /// Sync blocking streams on the given device + extern void syncStreams(int devId); struct Function { diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp index 77d48a4368..6d68cfd9f5 100644 --- a/vdi/hip_memory.cpp +++ b/vdi/hip_memory.cpp @@ -65,8 +65,10 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags) *ptr = amd::SvmBuffer::malloc(*amdContext, flags, sizeBytes, amdContext->devices()[0]->info().memBaseAddrAlign_); if (*ptr == nullptr) { - hip::syncStreams(); - hip::getNullStream()->finish(); + for (auto& dev : g_devices) { + hip::getNullStream(*dev->asContext())->finish(); + hip::syncStreams(dev->deviceId()); + } *ptr = amd::SvmBuffer::malloc(*amdContext, flags, sizeBytes, amdContext->devices()[0]->info().memBaseAddrAlign_); if (*ptr == nullptr) { @@ -223,12 +225,12 @@ hipError_t hipFree(void* ptr) { HIP_RETURN(hipSuccess); } if (amd::SvmBuffer::malloced(ptr)) { - hip::syncStreams(); for (auto& dev : g_devices) { amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); if (queue != nullptr) { queue->finish(); } + hip::syncStreams(dev->deviceId()); } amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr); HIP_RETURN(hipSuccess); @@ -676,9 +678,15 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) hipError_t hipHostUnregister(void* hostPtr) { HIP_INIT_API(hipHostUnregister, hostPtr); + for (auto& dev : g_devices) { + amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); + if (queue != nullptr) { + queue->finish(); + } + hip::syncStreams(dev->deviceId()); + } + if (amd::SvmBuffer::malloced(hostPtr)) { - hip::syncStreams(); - hip::getNullStream()->finish(); amd::SvmBuffer::free(*hip::host_device->asContext(), hostPtr); HIP_RETURN(hipSuccess); } else { @@ -686,8 +694,6 @@ hipError_t hipHostUnregister(void* hostPtr) { amd::Memory* mem = getMemoryObject(hostPtr, offset); if(mem) { - hip::syncStreams(); - hip::getNullStream()->finish(); for (const auto& device: hip::getCurrentDevice()->devices()) { const device::Memory* devMem = mem->getDeviceMemory(*device); amd::MemObjMap::RemoveMemObj(reinterpret_cast(devMem->virtualAddress())); @@ -2033,4 +2039,4 @@ hipError_t hipMemcpyHtoAAsync(hipArray* dstArray, HIP_INIT_API(hipMemcpyHtoAAsync, dstArray, dstOffset, srcHost, ByteCount, stream); HIP_RETURN(ihipMemcpyHtoA(srcHost, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, 0, 0, stream, true)); -} \ No newline at end of file +} diff --git a/vdi/hip_stream.cpp b/vdi/hip_stream.cpp index 55a975cc5c..53311ae4ff 100644 --- a/vdi/hip_stream.cpp +++ b/vdi/hip_stream.cpp @@ -42,14 +42,20 @@ class StreamCallback { namespace hip { -void syncStreams() { +void syncStreams(int devId) { amd::ScopedLock lock(streamSetLock); for (const auto& it : streamSet) { - it->finish(); + if (it->device->deviceId() == devId) { + it->finish(); + } } } +void syncStreams() { + syncStreams(getCurrentDevice()->deviceId()); +} + Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, unsigned int f) : queue(nullptr), device(dev), priority(p), flags(f) {}