diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 611079f7c2..7b59e96975 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -47,6 +47,9 @@ THE SOFTWARE. #include "trace_helper.h" #include "env.h" +//TODO - create a stream-based debug interface as an additional option for tprintf +#define DB_PEER_CTX 0 + //================================================================================================= //Global variables: @@ -463,7 +466,9 @@ void ihipCtxCriticalBase_t::recomputePeerAgents() template<> bool ihipCtxCriticalBase_t::isPeerWatcher(const ihipCtx_t *peer) { - auto match = std::find(_peers.begin(), _peers.end(), peer); + auto match = std::find_if(_peers.begin(), _peers.end(), + [=] (const ihipCtx_t *d) { return d->getDeviceNum() == peer->getDeviceNum(); }); + return (match != std::end(_peers)); } @@ -1679,6 +1684,9 @@ const char *ihipErrorString(hipError_t hip_error) // So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx. bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo) { + if (copyEngineCtx == nullptr) { + return false; + } // Make sure this is a device-to-device copy with all memory available to the requested copy engine // @@ -1686,11 +1694,18 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPoin if (dstPtrInfo->_sizeBytes == 0) { return false; } else { +#if USE_APP_PTR_FOR_CTX + ihipCtx_t *dstCtx = static_cast (dstPtrInfo->_appPtr); +#else ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId); +#endif if (copyEngineCtx != dstCtx) { // Only checks peer list if contexts are different LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData()); - //tprintf(DB_SYNC, "dstCrit lock succeeded\n"); +#if DB_PEER_CTX + std::cerr << "checking peer : copyEngineCtx =" << copyEngineCtx << " dstCtx =" << dstCtx << " peerCnt=" + << ctxCrit->peerCnt() << "\n"; +#endif if (!ctxCrit->isPeerWatcher(copyEngineCtx)) { return false; }; @@ -1698,16 +1713,22 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPoin } - // TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here: if (srcPtrInfo->_sizeBytes == 0) { return false; } else { +#if USE_APP_PTR_FOR_CTX + ihipCtx_t *srcCtx = static_cast (srcPtrInfo->_appPtr); +#else ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId); +#endif if (copyEngineCtx != srcCtx) { // Only checks peer list if contexts are different LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData()); - //tprintf(DB_SYNC, "srcCrit lock succeeded\n"); +#if DB_PEER_CTX + std::cerr << "checking peer : copyEngineCtx =" << copyEngineCtx << " srcCtx =" << srcCtx << " peerCnt=" + << ctxCrit->peerCnt() << "\n"; +#endif if (!ctxCrit->isPeerWatcher(copyEngineCtx)) { return false; }; @@ -1807,7 +1828,7 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, } } else { *forceUnpinnedCopy = true; - tprintf (DB_COPY, "P2P: Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n", + tprintf (DB_COPY, "Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n", *copyDevice ? (*copyDevice)->getDeviceNum() : -1, *copyDevice ? (*copyDevice)->getDevice()->_hsaAgent.handle : 0x0); if (HIP_FAIL_SOC & 0x2) { @@ -1822,10 +1843,11 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const hc::AmPointerInfo &ptrInfo) { - tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d\n", + tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d allocSeqNum=%zu, appAllocationFlags=%x, appPtr=%p\n", tag, ptr, ptrInfo._hostPointer, ptrInfo._devicePointer, ptrInfo._sizeBytes, - ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged); + ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged, + ptrInfo._allocSeqNum, ptrInfo._appAllocationFlags, ptrInfo._appPtr); } @@ -1873,12 +1895,14 @@ void tailorPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeByte }; -bool getTailoredPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes) +bool getTailoredPtrInfo(const char *tag, hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes) { bool tracked = (hc::am_memtracker_getinfo(ptrInfo, ptr) == AM_SUCCESS); + printPointerInfo(DB_COPY, tag, ptr, *ptrInfo); if (tracked) { tailorPtrInfo(ptrInfo, ptr, sizeBytes); + printPointerInfo(DB_COPY, " mod", ptr, *ptrInfo); } return tracked; @@ -1908,8 +1932,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0); hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0); #endif - bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes); - bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes); + bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes); + bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes); // Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not valid, so check it here: @@ -2036,21 +2060,18 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0); hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0); #endif - bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes); - bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes); + tprintf (DB_COPY, "copyASync dst=%p src=%p, sz=%zu\n", dst, src, sizeBytes); + bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes); + bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes); hc::hcCommandKind hcCopyDir; ihipCtx_t *copyDevice; bool forceUnpinnedCopy; resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, ©Device, &forceUnpinnedCopy); - tprintf (DB_COPY, "copyASync copyDev:%d dst=%p (phys_dev:%d, isDevMem:%d) src=%p(phys_dev:%d, isDevMem:%d) sz=%zu dir=%s forceUnpinnedCopy=%d\n", + tprintf (DB_COPY, " copyDev:%d dir=%s forceUnpinnedCopy=%d\n", copyDevice ? copyDevice->getDeviceNum():-1, - dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem, - src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem, - sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy); - printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo); - printPointerInfo(DB_COPY, " src", src, srcPtrInfo); + hcMemcpyStr(hcCopyDir), forceUnpinnedCopy); // "tracked" really indicates if the pointer's virtual address is available in the GPU address space. // If both pointers are not tracked, we need to fall back to a sync copy. diff --git a/hipamd/src/hip_hcc_internal.h b/hipamd/src/hip_hcc_internal.h index 197cd35bfa..e65dedabd8 100644 --- a/hipamd/src/hip_hcc_internal.h +++ b/hipamd/src/hip_hcc_internal.h @@ -32,10 +32,19 @@ THE SOFTWARE. #include "env.h" -#if defined(__HCC__) && (__hcc_workweek__ < 16354) +#if (__hcc_workweek__ < 16354) #error("This version of HIP requires a newer version of HCC."); #endif +// Use the __appPtr field in the am memtracker to store the context. +// Requires a bug fix in HCC +#if defined(__HCC_HAS_EXTENDED_AM_MEMTRACKER_UPDATE) and (__HCC_HAS_EXTENDED_AM_MEMTRACKER_UPDATE != 0) +#define USE_APP_PTR_FOR_CTX 1 +#endif + + + + #define USE_IPC 1 //--- diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index a8324c5729..5a4b5f4b4e 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -61,7 +61,11 @@ int sharePtr(void *ptr, ihipCtx_t *ctx, bool shareWithAll, unsigned hipFlags) auto device = ctx->getWriteableDevice(); +#if USE_APP_PTR_FOR_CTX + hc::am_memtracker_update(ptr, device->_deviceId, hipFlags, ctx); +#else hc::am_memtracker_update(ptr, device->_deviceId, hipFlags); +#endif if (shareWithAll) { hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt+1, g_allAgents, NULL, ptr); @@ -660,7 +664,11 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) vecAcc.push_back(ihipGetDevice(i)->_acc); } am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size()); +#if USE_APP_PTR_FOR_CTX + hc::am_memtracker_update(hostPtr, device->_deviceId, flags, ctx); +#else hc::am_memtracker_update(hostPtr, device->_deviceId, flags); +#endif tprintf(DB_MEM, " %s registered ptr=%p and allowed access to %zu peers\n", __func__, hostPtr, vecAcc.size()); if(am_status == AM_SUCCESS){