|
|
|
@@ -47,6 +47,9 @@ THE SOFTWARE.
|
|
|
|
|
#include "trace_helper.h"
|
|
|
|
|
#include "env.h"
|
|
|
|
|
|
|
|
|
|
//TODO - create a stream-based debug interface as an additional option for tprintf
|
|
|
|
|
#define DB_PEER_CTX 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//=================================================================================================
|
|
|
|
|
//Global variables:
|
|
|
|
@@ -463,7 +466,9 @@ void ihipCtxCriticalBase_t<CtxMutex>::recomputePeerAgents()
|
|
|
|
|
template<>
|
|
|
|
|
bool ihipCtxCriticalBase_t<CtxMutex>::isPeerWatcher(const ihipCtx_t *peer)
|
|
|
|
|
{
|
|
|
|
|
auto match = std::find(_peers.begin(), _peers.end(), peer);
|
|
|
|
|
auto match = std::find_if(_peers.begin(), _peers.end(),
|
|
|
|
|
[=] (const ihipCtx_t *d) { return d->getDeviceNum() == peer->getDeviceNum(); });
|
|
|
|
|
|
|
|
|
|
return (match != std::end(_peers));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -1679,6 +1684,9 @@ const char *ihipErrorString(hipError_t hip_error)
|
|
|
|
|
// So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx.
|
|
|
|
|
bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo)
|
|
|
|
|
{
|
|
|
|
|
if (copyEngineCtx == nullptr) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Make sure this is a device-to-device copy with all memory available to the requested copy engine
|
|
|
|
|
//
|
|
|
|
@@ -1686,11 +1694,18 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPoin
|
|
|
|
|
if (dstPtrInfo->_sizeBytes == 0) {
|
|
|
|
|
return false;
|
|
|
|
|
} else {
|
|
|
|
|
#if USE_APP_PTR_FOR_CTX
|
|
|
|
|
ihipCtx_t *dstCtx = static_cast<ihipCtx_t*> (dstPtrInfo->_appPtr);
|
|
|
|
|
#else
|
|
|
|
|
ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId);
|
|
|
|
|
#endif
|
|
|
|
|
if (copyEngineCtx != dstCtx) {
|
|
|
|
|
// Only checks peer list if contexts are different
|
|
|
|
|
LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData());
|
|
|
|
|
//tprintf(DB_SYNC, "dstCrit lock succeeded\n");
|
|
|
|
|
#if DB_PEER_CTX
|
|
|
|
|
std::cerr << "checking peer : copyEngineCtx =" << copyEngineCtx << " dstCtx =" << dstCtx << " peerCnt="
|
|
|
|
|
<< ctxCrit->peerCnt() << "\n";
|
|
|
|
|
#endif
|
|
|
|
|
if (!ctxCrit->isPeerWatcher(copyEngineCtx)) {
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
@@ -1698,16 +1713,22 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPoin
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
|
|
|
|
|
if (srcPtrInfo->_sizeBytes == 0) {
|
|
|
|
|
return false;
|
|
|
|
|
} else {
|
|
|
|
|
#if USE_APP_PTR_FOR_CTX
|
|
|
|
|
ihipCtx_t *srcCtx = static_cast<ihipCtx_t*> (srcPtrInfo->_appPtr);
|
|
|
|
|
#else
|
|
|
|
|
ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId);
|
|
|
|
|
#endif
|
|
|
|
|
if (copyEngineCtx != srcCtx) {
|
|
|
|
|
// Only checks peer list if contexts are different
|
|
|
|
|
LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData());
|
|
|
|
|
//tprintf(DB_SYNC, "srcCrit lock succeeded\n");
|
|
|
|
|
#if DB_PEER_CTX
|
|
|
|
|
std::cerr << "checking peer : copyEngineCtx =" << copyEngineCtx << " srcCtx =" << srcCtx << " peerCnt="
|
|
|
|
|
<< ctxCrit->peerCnt() << "\n";
|
|
|
|
|
#endif
|
|
|
|
|
if (!ctxCrit->isPeerWatcher(copyEngineCtx)) {
|
|
|
|
|
return false;
|
|
|
|
|
};
|
|
|
|
@@ -1807,7 +1828,7 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
*forceUnpinnedCopy = true;
|
|
|
|
|
tprintf (DB_COPY, "P2P: Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n",
|
|
|
|
|
tprintf (DB_COPY, "Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n",
|
|
|
|
|
*copyDevice ? (*copyDevice)->getDeviceNum() : -1,
|
|
|
|
|
*copyDevice ? (*copyDevice)->getDevice()->_hsaAgent.handle : 0x0);
|
|
|
|
|
if (HIP_FAIL_SOC & 0x2) {
|
|
|
|
@@ -1822,10 +1843,11 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,
|
|
|
|
|
|
|
|
|
|
void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const hc::AmPointerInfo &ptrInfo)
|
|
|
|
|
{
|
|
|
|
|
tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d\n",
|
|
|
|
|
tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d allocSeqNum=%zu, appAllocationFlags=%x, appPtr=%p\n",
|
|
|
|
|
tag, ptr,
|
|
|
|
|
ptrInfo._hostPointer, ptrInfo._devicePointer, ptrInfo._sizeBytes,
|
|
|
|
|
ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged);
|
|
|
|
|
ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged,
|
|
|
|
|
ptrInfo._allocSeqNum, ptrInfo._appAllocationFlags, ptrInfo._appPtr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -1873,12 +1895,14 @@ void tailorPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeByte
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool getTailoredPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes)
|
|
|
|
|
bool getTailoredPtrInfo(const char *tag, hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes)
|
|
|
|
|
{
|
|
|
|
|
bool tracked = (hc::am_memtracker_getinfo(ptrInfo, ptr) == AM_SUCCESS);
|
|
|
|
|
printPointerInfo(DB_COPY, tag, ptr, *ptrInfo);
|
|
|
|
|
|
|
|
|
|
if (tracked) {
|
|
|
|
|
tailorPtrInfo(ptrInfo, ptr, sizeBytes);
|
|
|
|
|
printPointerInfo(DB_COPY, " mod", ptr, *ptrInfo);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return tracked;
|
|
|
|
@@ -1908,8 +1932,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
|
|
|
|
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
|
|
|
|
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
|
|
|
|
#endif
|
|
|
|
|
bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes);
|
|
|
|
|
bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes);
|
|
|
|
|
bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes);
|
|
|
|
|
bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not valid, so check it here:
|
|
|
|
@@ -2036,21 +2060,18 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
|
|
|
|
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
|
|
|
|
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
|
|
|
|
#endif
|
|
|
|
|
bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes);
|
|
|
|
|
bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes);
|
|
|
|
|
tprintf (DB_COPY, "copyASync dst=%p src=%p, sz=%zu\n", dst, src, sizeBytes);
|
|
|
|
|
bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes);
|
|
|
|
|
bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hc::hcCommandKind hcCopyDir;
|
|
|
|
|
ihipCtx_t *copyDevice;
|
|
|
|
|
bool forceUnpinnedCopy;
|
|
|
|
|
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, ©Device, &forceUnpinnedCopy);
|
|
|
|
|
tprintf (DB_COPY, "copyASync copyDev:%d dst=%p (phys_dev:%d, isDevMem:%d) src=%p(phys_dev:%d, isDevMem:%d) sz=%zu dir=%s forceUnpinnedCopy=%d\n",
|
|
|
|
|
tprintf (DB_COPY, " copyDev:%d dir=%s forceUnpinnedCopy=%d\n",
|
|
|
|
|
copyDevice ? copyDevice->getDeviceNum():-1,
|
|
|
|
|
dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem,
|
|
|
|
|
src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem,
|
|
|
|
|
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
|
|
|
|
|
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
|
|
|
|
|
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
|
|
|
|
|
hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
|
|
|
|
|
|
|
|
|
|
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
|
|
|
|
|
// If both pointers are not tracked, we need to fall back to a sync copy.
|
|
|
|
|