From df98fd8531e7b099b88bb0f0f9bdbad33abd64e9 Mon Sep 17 00:00:00 2001 From: bwicakso Date: Wed, 20 Apr 2016 15:51:39 -0500 Subject: [PATCH] Fix for kernel synchronization The completion future of a particular kernel is lost if there are multiple kernels in the stream. This can cause a racing condition where the signal associated with the unreferenced completion_future might get released by hcc runtime. --- include/hcc_detail/hip_hcc.h | 6 +++++- src/hip_hcc.cpp | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/hcc_detail/hip_hcc.h b/include/hcc_detail/hip_hcc.h index 58b58e2c99..5fa9be0fbc 100644 --- a/include/hcc_detail/hip_hcc.h +++ b/include/hcc_detail/hip_hcc.h @@ -440,9 +440,13 @@ public: hc::accelerator_view _av; unsigned _flags; -private: // Critical Data. THis MUST be accessed through LockedAccessor_StreamCrit_t +private: + // Critical Data. THis MUST be accessed through LockedAccessor_StreamCrit_t ihipStreamCritical_t _criticalData; + // Array of dependency completion_future. + std::vector _depFutures; + private: void enqueueBarrier(hsa_queue_t* queue, ihipSignal_t *depSignal); void waitCopy(LockedAccessor_StreamCrit_t &crit, ihipSignal_t *signal); diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 44cccf14fa..64d529f875 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -181,6 +181,8 @@ void ihipStream_t::wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty // Reset the stream to "empty" - next command will not set up an inpute dependency on any older signal. crit->_last_command_type = ihipCommandCopyH2D; crit->_last_copy_signal = NULL; + + _depFutures.clear(); } @@ -428,6 +430,9 @@ int ihipStream_t::preCopyCommand(LockedAccessor_StreamCrit_t &crit, ihipSignal_t needSync = 1; hsa_signal_t *hsaSignal = (static_cast (crit->_last_kernel_future.get_native_handle())); if (hsaSignal) { + // Keep reference to the kernel future in order to keep the + // dependent signal alive. + _depFutures.push_back(crit->_last_kernel_future); *waitSignal = * hsaSignal; } else { assert(0); // if NULL signal, and we return 1, hsa_amd_memory_copy_async will fail. Confirm this never happens.