From 45306fcdf6fdf3b8a464df70664e1b3766576986 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Fri, 17 Apr 2020 10:42:46 -0400 Subject: [PATCH] SWDEV-231579 - [hipclang-vdi-rocm][perf] - HIPPerfDispatchSpeed disparity between HIP/HCC vs HIP/VDI Insert a wait marker command in the default stream only when HIP has pending operations on other async streams Change-Id: I68660a54867fab7571ba57eb1df5feb1bca1c61a [ROCm/hip commit: db70fc66b7cdec8fb749e78f6ecc28b1a2336991] --- projects/hip/vdi/hip_context.cpp | 27 ++++++++++++-- projects/hip/vdi/hip_device.cpp | 2 ++ projects/hip/vdi/hip_device_runtime.cpp | 2 -- projects/hip/vdi/hip_event.cpp | 3 +- projects/hip/vdi/hip_internal.hpp | 10 +++--- projects/hip/vdi/hip_memory.cpp | 5 --- projects/hip/vdi/hip_stream.cpp | 48 ++++++++++++++++++++----- 7 files changed, 72 insertions(+), 25 deletions(-) diff --git a/projects/hip/vdi/hip_context.cpp b/projects/hip/vdi/hip_context.cpp index 1e2ae46fd5..8869bb07ff 100755 --- a/projects/hip/vdi/hip_context.cpp +++ b/projects/hip/vdi/hip_context.cpp @@ -80,13 +80,36 @@ void setCurrentDevice(unsigned int index) { amd::HostQueue* getQueue(hipStream_t stream) { if (stream == nullptr) { - syncStreams(); return getNullStream(); } else { hip::Stream* s = reinterpret_cast(stream); + // Wait for null stream if ((s->flags & hipStreamNonBlocking) == 0) { - getNullStream()->finish(); + amd::HostQueue* nullStream = getNullStream(); + amd::Command::EventWaitList eventWaitList; + + amd::Command* command = nullStream->getLastQueuedCommand(true); + if ((command != nullptr) && + // Check the current active status + (command->status() != CL_COMPLETE)) { + eventWaitList.push_back(command); + } + + // Check if we have to wait anything + if (eventWaitList.size() > 0) { + amd::Command* command = new amd::Marker(*s->asHostQueue(), false, eventWaitList); + if (command != nullptr) { + command->enqueue(); + command->release(); + } + } + + // Release all active commands. It's safe after the marker was enqueued + for (const auto& it : eventWaitList) { + it->release(); + } } + return s->asHostQueue(); } } diff --git a/projects/hip/vdi/hip_device.cpp b/projects/hip/vdi/hip_device.cpp index 80e247f37c..c01dd5f195 100644 --- a/projects/hip/vdi/hip_device.cpp +++ b/projects/hip/vdi/hip_device.cpp @@ -35,6 +35,8 @@ amd::HostQueue* Device::defaultStream() { return nullptr; } } + // Wait for all active streams before executing commands on the default + iHipWaitActiveStreams(defaultStream_); return defaultStream_; } diff --git a/projects/hip/vdi/hip_device_runtime.cpp b/projects/hip/vdi/hip_device_runtime.cpp index febf64d116..4cd6731824 100644 --- a/projects/hip/vdi/hip_device_runtime.cpp +++ b/projects/hip/vdi/hip_device_runtime.cpp @@ -432,8 +432,6 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { hipError_t hipDeviceSynchronize ( void ) { HIP_INIT_API(hipDeviceSynchronize); - hip::syncStreams(); - amd::HostQueue* queue = hip::getNullStream(); if (!queue) { diff --git a/projects/hip/vdi/hip_event.cpp b/projects/hip/vdi/hip_event.cpp index 677becd67e..0cd061c1e8 100644 --- a/projects/hip/vdi/hip_event.cpp +++ b/projects/hip/vdi/hip_event.cpp @@ -222,8 +222,7 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { hip::Stream* s = reinterpret_cast(stream); amd::HostQueue* queue = hip::getQueue(stream); - amd::Command* command = (s != nullptr && (s->flags & hipStreamNonBlocking)) ? - queue->getLastQueuedCommand(true) : nullptr; + amd::Command* command = queue->getLastQueuedCommand(true); if (command == nullptr) { command = new amd::Marker(*queue, false); diff --git a/projects/hip/vdi/hip_internal.hpp b/projects/hip/vdi/hip_internal.hpp index 10819350f5..0d0caada14 100755 --- a/projects/hip/vdi/hip_internal.hpp +++ b/projects/hip/vdi/hip_internal.hpp @@ -143,11 +143,6 @@ namespace hip { extern amd::HostQueue* getNullStream(amd::Context&); /// Get default stream of the thread extern amd::HostQueue* getNullStream(); - /// Sync Blocking streams on the current device - extern void syncStreams(); - /// Sync blocking streams on the given device - extern void syncStreams(int devId); - struct Function { amd::Kernel* function_; @@ -289,9 +284,12 @@ public: void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); void popExec(ihipExec_t& exec); - }; +/// Wait all active streams on the blocking queue. The method enqueues a wait command and +/// doesn't stall the current thread +extern void iHipWaitActiveStreams(amd::HostQueue* blocking_queue); + extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); extern int ihipGetDevice(); diff --git a/projects/hip/vdi/hip_memory.cpp b/projects/hip/vdi/hip_memory.cpp index 9debd91bf6..4178cea93e 100755 --- a/projects/hip/vdi/hip_memory.cpp +++ b/projects/hip/vdi/hip_memory.cpp @@ -52,7 +52,6 @@ hipError_t ihipFree(void *ptr) if (queue != nullptr) { queue->finish(); } - hip::syncStreams(dev->deviceId()); } amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr); return hipSuccess; @@ -240,7 +239,6 @@ hipError_t hipFree(void* ptr) { hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind); - hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue)); } @@ -289,7 +287,6 @@ hipError_t ihipArrayDestroy(hipArray* array) { if (queue != nullptr) { queue->finish(); } - hip::syncStreams(dev->deviceId()); } as_amd(memObj)->release(); @@ -691,7 +688,6 @@ hipError_t hipHostUnregister(void* hostPtr) { if (queue != nullptr) { queue->finish(); } - hip::syncStreams(dev->deviceId()); } if (amd::SvmBuffer::malloced(hostPtr)) { @@ -1917,7 +1913,6 @@ hipError_t hipIpcCloseMemHandle(void* dev_ptr) { amd::Device* device = nullptr; amd::Memory* amd_mem_obj = nullptr; - hip::syncStreams(); hip::getNullStream()->finish(); if (dev_ptr == nullptr) { diff --git a/projects/hip/vdi/hip_stream.cpp b/projects/hip/vdi/hip_stream.cpp index eac42c0203..aefddef17f 100644 --- a/projects/hip/vdi/hip_stream.cpp +++ b/projects/hip/vdi/hip_stream.cpp @@ -42,20 +42,16 @@ class StreamCallback { namespace hip { -void syncStreams(int devId) { +void syncStreams() { amd::ScopedLock lock(streamSetLock); for (const auto& it : streamSet) { - if (it->device->deviceId() == devId) { + if (it->device->deviceId() == getCurrentDevice()->deviceId()) { it->finish(); } } } -void syncStreams() { - syncStreams(getCurrentDevice()->deviceId()); -} - Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, unsigned int f) : queue(nullptr), lock("Stream Callback lock"), device(dev), priority(p), flags(f) {} @@ -89,6 +85,44 @@ void Stream::finish() { }; +void iHipWaitActiveStreams(amd::HostQueue* blocking_queue) { + amd::Command::EventWaitList eventWaitList; + { + amd::ScopedLock lock(streamSetLock); + + for (const auto& it : streamSet) { + // If it's the current device + if ((it->queue != nullptr) && (&it->queue->device() == &blocking_queue->device()) && + // and it's a blocking streamclan + ((it->flags & hipStreamNonBlocking) == 0) && + // and it's not the current stream + (it->asHostQueue() != blocking_queue)) { + // Get the last valid so command + amd::Command* command = it->asHostQueue()->getLastQueuedCommand(true); + if ((command != nullptr) && + // Check the current active status + (command->status() != CL_COMPLETE)) { + eventWaitList.push_back(command); + } + } + } + } + + // Check if we have to wait anything + if (eventWaitList.size() > 0) { + amd::Command* command = new amd::Marker(*blocking_queue, false, eventWaitList); + if (command != nullptr) { + command->enqueue(); + command->release(); + } + } + + // Release all active commands. It's safe after the marker was enqueued + for (const auto& it : eventWaitList) { + it->release(); + } +} + void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) { hipError_t status = hipSuccess; StreamCallback* cbo = reinterpret_cast(user_data); @@ -270,5 +304,3 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback HIP_RETURN(hipSuccess); } - -