diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index c87e201c0c..3bb3d6d128 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -243,7 +243,7 @@ ihipStream_t::ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int _id(0), // will be set by add function. _flags(flags), _ctx(ctx), - _criticalData(av) + _criticalData(this, av) { unsigned schedBits = ctx->_ctxFlags & hipDeviceScheduleMask; @@ -256,7 +256,6 @@ ihipStream_t::ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int }; - tprintf(DB_SYNC, " streamCreate: stream=%s\n", ToString(this).c_str()); }; @@ -271,7 +270,7 @@ ihipStream_t::~ihipStream_t() void ihipStream_t::wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty) { if (! assertQueueEmpty) { - tprintf (DB_SYNC, "stream %s wait for queue-empty..\n", ToString(this).c_str()); + tprintf (DB_SYNC, "%s wait for queue-empty..\n", ToString(this).c_str()); hc::hcWaitMode waitMode = hc::hcWaitModeActive; if (_scheduleMode == Auto) { @@ -406,21 +405,21 @@ void ihipStream_t::lockclose_postKernelCommand(const char * kernelName, hc::acce - //============================================================================= - // Recompute the peercnt and the packed _peerAgents whenever a peer is added or deleted. - // The packed _peerAgents can efficiently be used on each memory allocation. - template<> - void ihipCtxCriticalBase_t::recomputePeerAgents() - { - _peerCnt = 0; - std::for_each (_peers.begin(), _peers.end(), [this](ihipCtx_t* ctx) { - _peerAgents[_peerCnt++] = ctx->getDevice()->_hsaAgent; - }); - } +//============================================================================= +// Recompute the peercnt and the packed _peerAgents whenever a peer is added or deleted. +// The packed _peerAgents can efficiently be used on each memory allocation. +template<> +void ihipCtxCriticalBase_t::recomputePeerAgents() +{ + _peerCnt = 0; + std::for_each (_peers.begin(), _peers.end(), [this](ihipCtx_t* ctx) { + _peerAgents[_peerCnt++] = ctx->getDevice()->_hsaAgent; + }); +} - template<> - bool ihipCtxCriticalBase_t::isPeerWatcher(const ihipCtx_t *peer) +template<> +bool ihipCtxCriticalBase_t::isPeerWatcher(const ihipCtx_t *peer) { auto match = std::find(_peers.begin(), _peers.end(), peer); return (match != std::end(_peers)); @@ -489,6 +488,7 @@ void ihipCtxCriticalBase_t::addStream(ihipStream_t *stream) { stream->_id = _streams.size(); _streams.push_back(stream); + tprintf(DB_SYNC, " addStream: %s\n", ToString(stream).c_str()); } //============================================================================= @@ -827,11 +827,11 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) ihipCtx_t::ihipCtx_t(ihipDevice_t *device, unsigned deviceCnt, unsigned flags) : _ctxFlags(flags), _device(device), - _criticalData(deviceCnt) + _criticalData(this, deviceCnt) { locked_reset(); - tprintf(DB_SYNC, "created ctx with defaultStream=%p\n", _defaultStream); + tprintf(DB_SYNC, "created ctx with defaultStream=%p (%s)\n", _defaultStream, ToString(_defaultStream).c_str()); }; @@ -861,7 +861,7 @@ void ihipCtx_t::locked_reset() for (auto streamI=crit->const_streams().begin(); streamI!=crit->const_streams().end(); streamI++) { ihipStream_t *stream = *streamI; (*streamI)->locked_wait(); - tprintf(DB_SYNC, " delete stream=%p\n", stream); + tprintf(DB_SYNC, " delete %s\n", ToString(stream).c_str()); delete stream; } @@ -905,15 +905,24 @@ ihipCtx_t::stealActiveQueue(LockedAccessor_CtxCrit_t &ctxCrit, ihipStream_t *nee for (auto iter=ctxCrit->streams().begin(); iter != ctxCrit->streams().end(); iter++) { if (*iter != needyStream) { auto victimCritPtr = (*iter)->_criticalData.mtry_lock(); - if (victimCritPtr && victimCritPtr->_hasQueue && (victimCritPtr->_kernelCnt == 0)) { + if (victimCritPtr) { + if (victimCritPtr->_hasQueue && (victimCritPtr->_kernelCnt == 0)) { + victimCritPtr->_hasQueue = false; - victimCritPtr->_hasQueue = false; + tprintf(DB_SYNC, " stealActiveQueue from victim:%s to needy:%s\n", + ToString(*iter).c_str(), ToString(needyStream).c_str()); - tprintf(DB_SYNC, " stealActiveQueue move queue from victim:%s to needy:%s\n", - ToString(*iter).c_str(), ToString(needyStream).c_str()); + // TODO - cleanup to remove forced setting to N + hc::accelerator_view av = victimCritPtr->_av; + uint64_t *p = (uint64_t*)(&victimCritPtr->_av); + *p = 0; // damage the victim av so attempt to use it will fault. - return victimCritPtr->_av; + (*iter)->_criticalData.munlock(); + return av; + } else { + (*iter)->_criticalData.munlock(); + } } } } @@ -1415,7 +1424,7 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream) } else { // ALl streams have to wait for legacy default stream to be empty: if (!(stream->_flags & hipStreamNonBlocking)) { - tprintf(DB_SYNC, "stream %p wait default stream\n", stream); + tprintf(DB_SYNC, "%s wait default stream\n", ToString(stream).c_str()); stream->getCtx()->_defaultStream->locked_wait(); } diff --git a/src/hip_hcc.h b/src/hip_hcc.h index f2a2fb49fa..876e5df816 100644 --- a/src/hip_hcc.h +++ b/src/hip_hcc.h @@ -292,6 +292,34 @@ extern "C" { const hipStream_t hipStreamNull = 0x0; +/** + * HIP IPC Handle Size + */ +#define HIP_IPC_HANDLE_SIZE 64 +class ihipIpcMemHandle_t +{ +public: +#if USE_IPC + hsa_amd_ipc_memory_t ipc_handle; ///< ipc memory handle on ROCr +#endif + char reserved[HIP_IPC_HANDLE_SIZE]; + size_t psize; +}; + + +class ihipModule_t { +public: + hsa_executable_t executable; + hsa_code_object_t object; + std::string fileName; + void *ptr; + size_t size; + + ihipModule_t() : executable(), object(), fileName(), ptr(nullptr), size(0) {} +}; + + +//--- // Used to remove lock, for performance or stimulating bugs. class FakeMutex { @@ -330,21 +358,21 @@ public: _autoUnlock(autoUnlock) { - tprintf(DB_SYNC, "lock critical data %s.%p\n", typeid(T).name(), _criticalData); + tprintf(DB_SYNC, "lock criticalData=%p for %s\n", _criticalData, ToString(_criticalData->_parent).c_str()); _criticalData->_mutex.lock(); }; ~LockedAccessor() { if (_autoUnlock) { - tprintf(DB_SYNC, "auto-unlock critical data %s.%p\n",typeid(T).name(), _criticalData); + tprintf(DB_SYNC, "auto-unlock criticalData=%p for %s\n", _criticalData, ToString(_criticalData->_parent).c_str()); _criticalData->_mutex.unlock(); } } void unlock() { - tprintf(DB_SYNC, "unlock critical data %s.%p\n", typeid(T).name(), _criticalData); + tprintf(DB_SYNC, "unlock criticalData=%p for %s\n", _criticalData, ToString(_criticalData->_parent).c_str()); _criticalData->_mutex.unlock(); } @@ -369,40 +397,16 @@ struct LockedBase { MUTEX_TYPE _mutex; }; -/** - * HIP IPC Handle Size - */ -#define HIP_IPC_HANDLE_SIZE 64 -class ihipIpcMemHandle_t -{ -public: -#if USE_IPC - hsa_amd_ipc_memory_t ipc_handle; ///< ipc memory handle on ROCr -#endif - char reserved[HIP_IPC_HANDLE_SIZE]; - size_t psize; -}; - - -class ihipModule_t { -public: - hsa_executable_t executable; - hsa_code_object_t object; - std::string fileName; - void *ptr; - size_t size; - - ihipModule_t() : executable(), object(), fileName(), ptr(nullptr), size(0) {} -}; template class ihipStreamCriticalBase_t : public LockedBase { public: - ihipStreamCriticalBase_t(hc::accelerator_view av) : + ihipStreamCriticalBase_t(ihipStream_t *parentStream, hc::accelerator_view av) : _kernelCnt(0), _av(av), - _hasQueue(true) + _hasQueue(true), + _parent(parentStream) { }; @@ -410,11 +414,20 @@ public: } ihipStreamCriticalBase_t * mlock() { LockedBase::lock(); return this;}; + + void munlock() { + tprintf(DB_SYNC, "munlock criticalData=%p for %s\n", this, ToString(this->_parent).c_str()); + LockedBase::unlock(); + }; + ihipStreamCriticalBase_t * mtry_lock() { - return LockedBase::try_lock() ? this: nullptr; + bool gotLock = LockedBase::try_lock() ; + tprintf(DB_SYNC, "mtry_lock=%d criticalData=%p for %s\n", gotLock, this, ToString(this->_parent).c_str()); + return gotLock ? this: nullptr; }; public: + ihipStream_t * _parent; uint32_t _kernelCnt; // Count of inflight kernels in this stream. Reset at ::wait(). hc::accelerator_view _av; @@ -596,8 +609,9 @@ template class ihipCtxCriticalBase_t : LockedBase { public: - ihipCtxCriticalBase_t(unsigned deviceCnt) : - _peerCnt(0) + ihipCtxCriticalBase_t(ihipCtx_t *parentCtx, unsigned deviceCnt) : + _parent(parentCtx), + _peerCnt(0) { _peerAgents = new hsa_agent_t[deviceCnt]; }; @@ -633,6 +647,8 @@ public: friend class LockedAccessor; private: + ihipCtx_t * _parent; + //--- Stream Tracker: std::list< ihipStream_t* > _streams; // streams associated with this device. @@ -739,7 +755,7 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t); // Stream printf functions: inline std::ostream& operator<<(std::ostream& os, const ihipStream_t& s) { - os << "stream#"; + os << "stream:"; os << s.getDevice()->_deviceId;; os << '.'; os << s._id; diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 74578e9b4b..5bc77cf543 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -131,7 +131,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) LockedAccessor_CtxCrit_t crit(ctx->criticalData()); // the peerCnt always stores self so make sure the trace actually peerCnt = crit->peerCnt(); - tprintf(DB_MEM, " allocated device_mem ptr:%p size:%zu on dev:%d and allowed %d other peer(s) access\n", + tprintf(DB_MEM, " allocated device_mem ptr:%p size:%zu on dev:%d and allow access to %d other peer(s)\n", *ptr, sizeBytes, device->_deviceId, peerCnt-1); if (peerCnt > 1) { @@ -841,7 +841,6 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s if (HIP_API_BLOCKING) { tprintf (DB_SYNC, "%s LAUNCH_BLOCKING wait for hipMemsetAsync.\n", ToString(stream).c_str()); cf.wait(); - //tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed [stream:%p].\n", __func__, (void*)stream); } } else { e = hipErrorInvalidValue; @@ -892,9 +891,9 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes ) if (HIP_LAUNCH_BLOCKING) { - tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset [stream:%p].\n", __func__, (void*)stream); + tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset in %s.\n", __func__, ToString(stream).c_str()); cf.wait(); - tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed [stream:%p].\n", __func__, (void*)stream); + tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed in %s.\n", __func__, ToString(stream).c_str()); } } else { e = hipErrorInvalidValue; diff --git a/src/hip_stream.cpp b/src/hip_stream.cpp index d754ffe5f6..8641f72265 100644 --- a/src/hip_stream.cpp +++ b/src/hip_stream.cpp @@ -54,7 +54,7 @@ hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags) *stream = istream; } - tprintf(DB_SYNC, "hipStreamCreate, stream=%p\n", *stream); + tprintf(DB_SYNC, "hipStreamCreate, %s\n", ToString(*stream).c_str()); } else { e = hipErrorInvalidDevice; }