diff --git a/include/hcc_detail/hip_hcc.h b/include/hcc_detail/hip_hcc.h index 58b58e2c99..5fa9be0fbc 100644 --- a/include/hcc_detail/hip_hcc.h +++ b/include/hcc_detail/hip_hcc.h @@ -440,9 +440,13 @@ public: hc::accelerator_view _av; unsigned _flags; -private: // Critical Data. THis MUST be accessed through LockedAccessor_StreamCrit_t +private: + // Critical Data. THis MUST be accessed through LockedAccessor_StreamCrit_t ihipStreamCritical_t _criticalData; + // Array of dependency completion_future. + std::vector _depFutures; + private: void enqueueBarrier(hsa_queue_t* queue, ihipSignal_t *depSignal); void waitCopy(LockedAccessor_StreamCrit_t &crit, ihipSignal_t *signal); diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 44cccf14fa..64d529f875 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -181,6 +181,8 @@ void ihipStream_t::wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty // Reset the stream to "empty" - next command will not set up an inpute dependency on any older signal. crit->_last_command_type = ihipCommandCopyH2D; crit->_last_copy_signal = NULL; + + _depFutures.clear(); } @@ -428,6 +430,9 @@ int ihipStream_t::preCopyCommand(LockedAccessor_StreamCrit_t &crit, ihipSignal_t needSync = 1; hsa_signal_t *hsaSignal = (static_cast (crit->_last_kernel_future.get_native_handle())); if (hsaSignal) { + // Keep reference to the kernel future in order to keep the + // dependent signal alive. + _depFutures.push_back(crit->_last_kernel_future); *waitSignal = * hsaSignal; } else { assert(0); // if NULL signal, and we return 1, hsa_amd_memory_copy_async will fail. Confirm this never happens.