Merge pull request #237 from bensander/use_ctxptr_for_p2p
Use ctxptr for p2p
This commit is contained in:
+39
-18
@@ -47,6 +47,9 @@ THE SOFTWARE.
|
||||
#include "trace_helper.h"
|
||||
#include "env.h"
|
||||
|
||||
//TODO - create a stream-based debug interface as an additional option for tprintf
|
||||
#define DB_PEER_CTX 0
|
||||
|
||||
|
||||
//=================================================================================================
|
||||
//Global variables:
|
||||
@@ -463,7 +466,9 @@ void ihipCtxCriticalBase_t<CtxMutex>::recomputePeerAgents()
|
||||
template<>
|
||||
bool ihipCtxCriticalBase_t<CtxMutex>::isPeerWatcher(const ihipCtx_t *peer)
|
||||
{
|
||||
auto match = std::find(_peers.begin(), _peers.end(), peer);
|
||||
auto match = std::find_if(_peers.begin(), _peers.end(),
|
||||
[=] (const ihipCtx_t *d) { return d->getDeviceNum() == peer->getDeviceNum(); });
|
||||
|
||||
return (match != std::end(_peers));
|
||||
}
|
||||
|
||||
@@ -1679,6 +1684,9 @@ const char *ihipErrorString(hipError_t hip_error)
|
||||
// So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx.
|
||||
bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo)
|
||||
{
|
||||
if (copyEngineCtx == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Make sure this is a device-to-device copy with all memory available to the requested copy engine
|
||||
//
|
||||
@@ -1686,11 +1694,18 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPoin
|
||||
if (dstPtrInfo->_sizeBytes == 0) {
|
||||
return false;
|
||||
} else {
|
||||
#if USE_APP_PTR_FOR_CTX
|
||||
ihipCtx_t *dstCtx = static_cast<ihipCtx_t*> (dstPtrInfo->_appPtr);
|
||||
#else
|
||||
ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId);
|
||||
#endif
|
||||
if (copyEngineCtx != dstCtx) {
|
||||
// Only checks peer list if contexts are different
|
||||
LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData());
|
||||
//tprintf(DB_SYNC, "dstCrit lock succeeded\n");
|
||||
#if DB_PEER_CTX
|
||||
std::cerr << "checking peer : copyEngineCtx =" << copyEngineCtx << " dstCtx =" << dstCtx << " peerCnt="
|
||||
<< ctxCrit->peerCnt() << "\n";
|
||||
#endif
|
||||
if (!ctxCrit->isPeerWatcher(copyEngineCtx)) {
|
||||
return false;
|
||||
};
|
||||
@@ -1698,16 +1713,22 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPoin
|
||||
}
|
||||
|
||||
|
||||
|
||||
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
|
||||
if (srcPtrInfo->_sizeBytes == 0) {
|
||||
return false;
|
||||
} else {
|
||||
#if USE_APP_PTR_FOR_CTX
|
||||
ihipCtx_t *srcCtx = static_cast<ihipCtx_t*> (srcPtrInfo->_appPtr);
|
||||
#else
|
||||
ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId);
|
||||
#endif
|
||||
if (copyEngineCtx != srcCtx) {
|
||||
// Only checks peer list if contexts are different
|
||||
LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData());
|
||||
//tprintf(DB_SYNC, "srcCrit lock succeeded\n");
|
||||
#if DB_PEER_CTX
|
||||
std::cerr << "checking peer : copyEngineCtx =" << copyEngineCtx << " srcCtx =" << srcCtx << " peerCnt="
|
||||
<< ctxCrit->peerCnt() << "\n";
|
||||
#endif
|
||||
if (!ctxCrit->isPeerWatcher(copyEngineCtx)) {
|
||||
return false;
|
||||
};
|
||||
@@ -1807,7 +1828,7 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,
|
||||
}
|
||||
} else {
|
||||
*forceUnpinnedCopy = true;
|
||||
tprintf (DB_COPY, "P2P: Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n",
|
||||
tprintf (DB_COPY, "Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n",
|
||||
*copyDevice ? (*copyDevice)->getDeviceNum() : -1,
|
||||
*copyDevice ? (*copyDevice)->getDevice()->_hsaAgent.handle : 0x0);
|
||||
if (HIP_FAIL_SOC & 0x2) {
|
||||
@@ -1822,10 +1843,11 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,
|
||||
|
||||
void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const hc::AmPointerInfo &ptrInfo)
|
||||
{
|
||||
tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d\n",
|
||||
tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d allocSeqNum=%zu, appAllocationFlags=%x, appPtr=%p\n",
|
||||
tag, ptr,
|
||||
ptrInfo._hostPointer, ptrInfo._devicePointer, ptrInfo._sizeBytes,
|
||||
ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged);
|
||||
ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged,
|
||||
ptrInfo._allocSeqNum, ptrInfo._appAllocationFlags, ptrInfo._appPtr);
|
||||
}
|
||||
|
||||
|
||||
@@ -1873,12 +1895,14 @@ void tailorPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeByte
|
||||
};
|
||||
|
||||
|
||||
bool getTailoredPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes)
|
||||
bool getTailoredPtrInfo(const char *tag, hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes)
|
||||
{
|
||||
bool tracked = (hc::am_memtracker_getinfo(ptrInfo, ptr) == AM_SUCCESS);
|
||||
printPointerInfo(DB_COPY, tag, ptr, *ptrInfo);
|
||||
|
||||
if (tracked) {
|
||||
tailorPtrInfo(ptrInfo, ptr, sizeBytes);
|
||||
printPointerInfo(DB_COPY, " mod", ptr, *ptrInfo);
|
||||
}
|
||||
|
||||
return tracked;
|
||||
@@ -1908,8 +1932,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes);
|
||||
bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes);
|
||||
bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes);
|
||||
bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes);
|
||||
|
||||
|
||||
// Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not valid, so check it here:
|
||||
@@ -2036,21 +2060,18 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes);
|
||||
bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes);
|
||||
tprintf (DB_COPY, "copyASync dst=%p src=%p, sz=%zu\n", dst, src, sizeBytes);
|
||||
bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes);
|
||||
bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes);
|
||||
|
||||
|
||||
hc::hcCommandKind hcCopyDir;
|
||||
ihipCtx_t *copyDevice;
|
||||
bool forceUnpinnedCopy;
|
||||
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, ©Device, &forceUnpinnedCopy);
|
||||
tprintf (DB_COPY, "copyASync copyDev:%d dst=%p (phys_dev:%d, isDevMem:%d) src=%p(phys_dev:%d, isDevMem:%d) sz=%zu dir=%s forceUnpinnedCopy=%d\n",
|
||||
tprintf (DB_COPY, " copyDev:%d dir=%s forceUnpinnedCopy=%d\n",
|
||||
copyDevice ? copyDevice->getDeviceNum():-1,
|
||||
dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem,
|
||||
src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem,
|
||||
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
|
||||
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
|
||||
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
|
||||
hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
|
||||
|
||||
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
|
||||
// If both pointers are not tracked, we need to fall back to a sync copy.
|
||||
|
||||
@@ -32,10 +32,19 @@ THE SOFTWARE.
|
||||
#include "env.h"
|
||||
|
||||
|
||||
#if defined(__HCC__) && (__hcc_workweek__ < 16354)
|
||||
#if (__hcc_workweek__ < 16354)
|
||||
#error("This version of HIP requires a newer version of HCC.");
|
||||
#endif
|
||||
|
||||
// Use the __appPtr field in the am memtracker to store the context.
|
||||
// Requires a bug fix in HCC
|
||||
#if defined(__HCC_HAS_EXTENDED_AM_MEMTRACKER_UPDATE) and (__HCC_HAS_EXTENDED_AM_MEMTRACKER_UPDATE != 0)
|
||||
#define USE_APP_PTR_FOR_CTX 1
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#define USE_IPC 1
|
||||
|
||||
//---
|
||||
|
||||
@@ -61,7 +61,11 @@ int sharePtr(void *ptr, ihipCtx_t *ctx, bool shareWithAll, unsigned hipFlags)
|
||||
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
#if USE_APP_PTR_FOR_CTX
|
||||
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags, ctx);
|
||||
#else
|
||||
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags);
|
||||
#endif
|
||||
|
||||
if (shareWithAll) {
|
||||
hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt+1, g_allAgents, NULL, ptr);
|
||||
@@ -660,7 +664,11 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
|
||||
vecAcc.push_back(ihipGetDevice(i)->_acc);
|
||||
}
|
||||
am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size());
|
||||
#if USE_APP_PTR_FOR_CTX
|
||||
hc::am_memtracker_update(hostPtr, device->_deviceId, flags, ctx);
|
||||
#else
|
||||
hc::am_memtracker_update(hostPtr, device->_deviceId, flags);
|
||||
#endif
|
||||
|
||||
tprintf(DB_MEM, " %s registered ptr=%p and allowed access to %zu peers\n", __func__, hostPtr, vecAcc.size());
|
||||
if(am_status == AM_SUCCESS){
|
||||
|
||||
Viittaa uudesa ongelmassa
Block a user