From 7530fa6dbea6bdbc3df9c39d52235e80cd644aed Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 19 Sep 2016 17:09:50 -0500 Subject: [PATCH] Remove HIP command dependency tracking. Change-Id: I991c13bc5108193959ba70f9f6f9c692c9ad3a5b --- include/hcc_detail/hip_hcc.h | 15 ---------- src/hip_event.cpp | 2 -- src/hip_hcc.cpp | 58 ++---------------------------------- 3 files changed, 3 insertions(+), 72 deletions(-) diff --git a/include/hcc_detail/hip_hcc.h b/include/hcc_detail/hip_hcc.h index 01f1b01313..d853c59c3f 100644 --- a/include/hcc_detail/hip_hcc.h +++ b/include/hcc_detail/hip_hcc.h @@ -373,8 +373,6 @@ class ihipStreamCriticalBase_t : public LockedBase { public: ihipStreamCriticalBase_t(hc::accelerator_view av) : - _last_command_type(ihipCommandCopyH2H), - _last_copy_signal(NULL), _signalCursor(0), _oldest_live_sig_id(1), _streamSigId(0), @@ -392,15 +390,6 @@ public: ihipStreamCriticalBase_t * mlock() { LockedBase::lock(); return this;}; public: - // Critical Data: - ihipCommand_t _last_command_type; // type of the last command - - // signal of last copy command sent to the stream. - // May be NULL, indicating the previous command has completley finished and future commands don't need to create a dependency. - // Copy can be either H2D or D2H. - ihipSignal_t *_last_copy_signal; - - hc::completion_future _last_kernel_future; // Completion future of last kernel command sent to GPU. // Signal pool: int _signalCursor; @@ -444,7 +433,6 @@ typedef uint64_t SeqNum_t ; void lockclose_postKernelCommand(hc::completion_future &kernel_future); - void locked_reclaimSignals(SIGSEQNUM sigNum); void locked_wait(bool assertQueueEmpty=false); hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); }; @@ -467,7 +455,6 @@ typedef uint64_t SeqNum_t ; void *kernarg, size_t kernSize, uint64_t kernel); // Non-threadsafe accessors - must be protected by high-level stream lock with accessor passed to function. - SIGSEQNUM lastCopySeqId (LockedAccessor_StreamCrit_t &crit) const { return crit->_last_copy_signal ? crit->_last_copy_signal->_sigId : 0; }; ihipSignal_t * allocSignal (LockedAccessor_StreamCrit_t &crit); @@ -526,8 +513,6 @@ struct ihipEvent_t { hc::completion_future _marker; uint64_t _timestamp; // store timestamp, may be set on host or by marker. - - SIGSEQNUM _copySeqId; } ; diff --git a/src/hip_event.cpp b/src/hip_event.cpp index 77d33cb6c2..52b25fc19b 100644 --- a/src/hip_event.cpp +++ b/src/hip_event.cpp @@ -39,7 +39,6 @@ hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags) eh->_stream = NULL; eh->_flags = flags; eh->_timestamp = 0; - eh->_copySeqId = 0; *event = eh; // TODO - allocat the event directly, no copy needed. } else { e = hipErrorInvalidValue; @@ -123,7 +122,6 @@ hipError_t hipEventSynchronize(hipEvent_t event) return ihipLogStatus(hipSuccess); } else { event->_marker.wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); - event->_stream->locked_reclaimSignals(event->_copySeqId); return ihipLogStatus(hipSuccess); } diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index ad76dbce77..995efb584c 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -212,20 +212,6 @@ ihipStream_t::~ihipStream_t() } -//--- -//TODO - this function is dangerous since it does not propertly account -//for younger commands which may be depending on the signals we are reclaiming. -//Will fix when we move to HCC management of copy signals. -void ihipStream_t::locked_reclaimSignals(SIGSEQNUM sigNum) -{ - LockedAccessor_StreamCrit_t crit(_criticalData); - - tprintf(DB_SIGNAL, "reclaim signal #%lu\n", sigNum); - // Mark all signals older and including this one as available for re-allocation. - crit->_oldest_live_sig_id = sigNum+1; -} - - //--- void ihipStream_t::waitCopy(LockedAccessor_StreamCrit_t &crit, ihipSignal_t *signal) { @@ -253,17 +239,7 @@ void ihipStream_t::wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty waitOnAllCFs(crit); } - if (crit->_last_copy_signal) { - tprintf (DB_SYNC, "stream %p wait for lastCopy:#%lu...\n", this, lastCopySeqId(crit) ); - this->waitCopy(crit, crit->_last_copy_signal); - } - crit->_kernelCnt = 0; - - // Reset the stream to "empty" - next command will not set up an inpute dependency on any older signal. - crit->_last_command_type = ihipCommandCopyH2D; - crit->_last_copy_signal = NULL; -// crit->_signalCnt = 0; } void ihipStream_t::addCFtoStream(LockedAccessor_StreamCrit_t &crit, hc::completion_future *cf) @@ -309,7 +285,6 @@ void ihipStream_t::locked_recordEvent(hipEvent_t event) LockedAccessor_StreamCrit_t crit(_criticalData); event->_marker = crit->_av.create_marker(); - event->_copySeqId = lastCopySeqId(crit); } //============================================================================= @@ -422,42 +397,16 @@ void ihipStream_t::enqueueBarrier(hsa_queue_t* queue, ihipSignal_t *depSignal, i int HIP_NUM_KERNELS_INFLIGHT = 128; //-- -//When the commands in a stream change types (ie kernel command follows a data command, -//or data command follows a kernel command), then we need to add a barrier packet -//into the stream to mimic CUDA stream semantics. (some hardware uses separate -//queues for data commands and kernel commands, and no implicit ordering is provided). -// +// Lock the stream to prevent other threads from intervening. LockedAccessor_StreamCrit_t ihipStream_t::lockopen_preKernelCommand() { LockedAccessor_StreamCrit_t crit(_criticalData, false/*no unlock at destruction*/); - if(crit->_kernelCnt > HIP_NUM_KERNELS_INFLIGHT){ - this->wait(crit); + this->wait(crit); crit->_kernelCnt = 0; } crit->_kernelCnt++; - // If switching command types, we need to add a barrier packet to synchronize things. - if (crit->_last_command_type != ihipCommandKernel) { - if (crit->_last_copy_signal) { - - hsa_queue_t * q = (hsa_queue_t*) (crit->_av.get_hsa_queue()); - if (HIP_DISABLE_HW_KERNEL_DEP == 0) { - this->enqueueBarrier(q, crit->_last_copy_signal, NULL); - tprintf (DB_SYNC, "stream %p switch %s to %s (barrier pkt inserted with wait on #%lu)\n", - this, ihipCommandName[crit->_last_command_type], ihipCommandName[ihipCommandKernel], crit->_last_copy_signal->_sigId) - - } else if (HIP_DISABLE_HW_KERNEL_DEP>0) { - tprintf (DB_SYNC, "stream %p switch %s to %s (HOST wait for previous...)\n", - this, ihipCommandName[crit->_last_command_type], ihipCommandName[ihipCommandKernel]); - this->waitCopy(crit, crit->_last_copy_signal); - } else if (HIP_DISABLE_HW_KERNEL_DEP==-1) { - tprintf (DB_SYNC, "stream %p switch %s to %s (IGNORE dependency)\n", - this, ihipCommandName[crit->_last_command_type], ihipCommandName[ihipCommandKernel]); - } - } - crit->_last_command_type = ihipCommandKernel; - } return crit; } @@ -467,8 +416,6 @@ LockedAccessor_StreamCrit_t ihipStream_t::lockopen_preKernelCommand() // Must be called after kernel finishes, this releases the lock on the stream so other commands can submit. void ihipStream_t::lockclose_postKernelCommand(hc::completion_future &kernelFuture) { - // We locked _criticalData in the lockopen_preKernelCommand() so OK to access here: - _criticalData._last_kernel_future = kernelFuture; if (HIP_LAUNCH_BLOCKING) { kernelFuture.wait(); @@ -1217,6 +1164,7 @@ void ihipInit() READ_ENV_I(release, HIP_PRINT_ENV, 0, "Print HIP environment variables."); //-- READ HIP_PRINT_ENV env first, since it has impact on later env var reading + // TODO: In HIP/hcc, this variable blocks after both kernel commmands and data transfer. Maybe should be bit-mask for each command type? READ_ENV_I(release, HIP_LAUNCH_BLOCKING, CUDA_LAUNCH_BLOCKING, "Make HIP APIs 'host-synchronous', so they block until any kernel launches or data copy commands complete. Alias: CUDA_LAUNCH_BLOCKING." ); READ_ENV_I(release, HIP_DB, 0, "Print various debug info. Bitmask, see hip_hcc.cpp for more information."); if ((HIP_DB & (1<