Remove HIP command dependency tracking.

Change-Id: I991c13bc5108193959ba70f9f6f9c692c9ad3a5b
Bu işleme şunda yer alıyor:
Ben Sander
2016-09-19 17:09:50 -05:00
ebeveyn 8c4cecf367
işleme 7530fa6dbe
3 değiştirilmiş dosya ile 3 ekleme ve 72 silme
-15
Dosyayı Görüntüle
@@ -373,8 +373,6 @@ class ihipStreamCriticalBase_t : public LockedBase<MUTEX_TYPE>
{
public:
ihipStreamCriticalBase_t(hc::accelerator_view av) :
_last_command_type(ihipCommandCopyH2H),
_last_copy_signal(NULL),
_signalCursor(0),
_oldest_live_sig_id(1),
_streamSigId(0),
@@ -392,15 +390,6 @@ public:
ihipStreamCriticalBase_t<StreamMutex> * mlock() { LockedBase<MUTEX_TYPE>::lock(); return this;};
public:
// Critical Data:
ihipCommand_t _last_command_type; // type of the last command
// signal of last copy command sent to the stream.
// May be NULL, indicating the previous command has completley finished and future commands don't need to create a dependency.
// Copy can be either H2D or D2H.
ihipSignal_t *_last_copy_signal;
hc::completion_future _last_kernel_future; // Completion future of last kernel command sent to GPU.
// Signal pool:
int _signalCursor;
@@ -444,7 +433,6 @@ typedef uint64_t SeqNum_t ;
void lockclose_postKernelCommand(hc::completion_future &kernel_future);
void locked_reclaimSignals(SIGSEQNUM sigNum);
void locked_wait(bool assertQueueEmpty=false);
hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); };
@@ -467,7 +455,6 @@ typedef uint64_t SeqNum_t ;
void *kernarg, size_t kernSize, uint64_t kernel);
// Non-threadsafe accessors - must be protected by high-level stream lock with accessor passed to function.
SIGSEQNUM lastCopySeqId (LockedAccessor_StreamCrit_t &crit) const { return crit->_last_copy_signal ? crit->_last_copy_signal->_sigId : 0; };
ihipSignal_t * allocSignal (LockedAccessor_StreamCrit_t &crit);
@@ -526,8 +513,6 @@ struct ihipEvent_t {
hc::completion_future _marker;
uint64_t _timestamp; // store timestamp, may be set on host or by marker.
SIGSEQNUM _copySeqId;
} ;
-2
Dosyayı Görüntüle
@@ -39,7 +39,6 @@ hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags)
eh->_stream = NULL;
eh->_flags = flags;
eh->_timestamp = 0;
eh->_copySeqId = 0;
*event = eh; // TODO - allocat the event directly, no copy needed.
} else {
e = hipErrorInvalidValue;
@@ -123,7 +122,6 @@ hipError_t hipEventSynchronize(hipEvent_t event)
return ihipLogStatus(hipSuccess);
} else {
event->_marker.wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
event->_stream->locked_reclaimSignals(event->_copySeqId);
return ihipLogStatus(hipSuccess);
}
+3 -55
Dosyayı Görüntüle
@@ -212,20 +212,6 @@ ihipStream_t::~ihipStream_t()
}
//---
//TODO - this function is dangerous since it does not propertly account
//for younger commands which may be depending on the signals we are reclaiming.
//Will fix when we move to HCC management of copy signals.
void ihipStream_t::locked_reclaimSignals(SIGSEQNUM sigNum)
{
LockedAccessor_StreamCrit_t crit(_criticalData);
tprintf(DB_SIGNAL, "reclaim signal #%lu\n", sigNum);
// Mark all signals older and including this one as available for re-allocation.
crit->_oldest_live_sig_id = sigNum+1;
}
//---
void ihipStream_t::waitCopy(LockedAccessor_StreamCrit_t &crit, ihipSignal_t *signal)
{
@@ -253,17 +239,7 @@ void ihipStream_t::wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty
waitOnAllCFs(crit);
}
if (crit->_last_copy_signal) {
tprintf (DB_SYNC, "stream %p wait for lastCopy:#%lu...\n", this, lastCopySeqId(crit) );
this->waitCopy(crit, crit->_last_copy_signal);
}
crit->_kernelCnt = 0;
// Reset the stream to "empty" - next command will not set up an inpute dependency on any older signal.
crit->_last_command_type = ihipCommandCopyH2D;
crit->_last_copy_signal = NULL;
// crit->_signalCnt = 0;
}
void ihipStream_t::addCFtoStream(LockedAccessor_StreamCrit_t &crit, hc::completion_future *cf)
@@ -309,7 +285,6 @@ void ihipStream_t::locked_recordEvent(hipEvent_t event)
LockedAccessor_StreamCrit_t crit(_criticalData);
event->_marker = crit->_av.create_marker();
event->_copySeqId = lastCopySeqId(crit);
}
//=============================================================================
@@ -422,42 +397,16 @@ void ihipStream_t::enqueueBarrier(hsa_queue_t* queue, ihipSignal_t *depSignal, i
int HIP_NUM_KERNELS_INFLIGHT = 128;
//--
//When the commands in a stream change types (ie kernel command follows a data command,
//or data command follows a kernel command), then we need to add a barrier packet
//into the stream to mimic CUDA stream semantics. (some hardware uses separate
//queues for data commands and kernel commands, and no implicit ordering is provided).
//
// Lock the stream to prevent other threads from intervening.
LockedAccessor_StreamCrit_t ihipStream_t::lockopen_preKernelCommand()
{
LockedAccessor_StreamCrit_t crit(_criticalData, false/*no unlock at destruction*/);
if(crit->_kernelCnt > HIP_NUM_KERNELS_INFLIGHT){
this->wait(crit);
this->wait(crit);
crit->_kernelCnt = 0;
}
crit->_kernelCnt++;
// If switching command types, we need to add a barrier packet to synchronize things.
if (crit->_last_command_type != ihipCommandKernel) {
if (crit->_last_copy_signal) {
hsa_queue_t * q = (hsa_queue_t*) (crit->_av.get_hsa_queue());
if (HIP_DISABLE_HW_KERNEL_DEP == 0) {
this->enqueueBarrier(q, crit->_last_copy_signal, NULL);
tprintf (DB_SYNC, "stream %p switch %s to %s (barrier pkt inserted with wait on #%lu)\n",
this, ihipCommandName[crit->_last_command_type], ihipCommandName[ihipCommandKernel], crit->_last_copy_signal->_sigId)
} else if (HIP_DISABLE_HW_KERNEL_DEP>0) {
tprintf (DB_SYNC, "stream %p switch %s to %s (HOST wait for previous...)\n",
this, ihipCommandName[crit->_last_command_type], ihipCommandName[ihipCommandKernel]);
this->waitCopy(crit, crit->_last_copy_signal);
} else if (HIP_DISABLE_HW_KERNEL_DEP==-1) {
tprintf (DB_SYNC, "stream %p switch %s to %s (IGNORE dependency)\n",
this, ihipCommandName[crit->_last_command_type], ihipCommandName[ihipCommandKernel]);
}
}
crit->_last_command_type = ihipCommandKernel;
}
return crit;
}
@@ -467,8 +416,6 @@ LockedAccessor_StreamCrit_t ihipStream_t::lockopen_preKernelCommand()
// Must be called after kernel finishes, this releases the lock on the stream so other commands can submit.
void ihipStream_t::lockclose_postKernelCommand(hc::completion_future &kernelFuture)
{
// We locked _criticalData in the lockopen_preKernelCommand() so OK to access here:
_criticalData._last_kernel_future = kernelFuture;
if (HIP_LAUNCH_BLOCKING) {
kernelFuture.wait();
@@ -1217,6 +1164,7 @@ void ihipInit()
READ_ENV_I(release, HIP_PRINT_ENV, 0, "Print HIP environment variables.");
//-- READ HIP_PRINT_ENV env first, since it has impact on later env var reading
// TODO: In HIP/hcc, this variable blocks after both kernel commmands and data transfer. Maybe should be bit-mask for each command type?
READ_ENV_I(release, HIP_LAUNCH_BLOCKING, CUDA_LAUNCH_BLOCKING, "Make HIP APIs 'host-synchronous', so they block until any kernel launches or data copy commands complete. Alias: CUDA_LAUNCH_BLOCKING." );
READ_ENV_I(release, HIP_DB, 0, "Print various debug info. Bitmask, see hip_hcc.cpp for more information.");
if ((HIP_DB & (1<<DB_API)) && (HIP_TRACE_API == 0)) {