Merge pull request #237 from bensander/use_ctxptr_for_p2p

Use ctxptr for p2p
This commit is contained in:
Ben Sander
2017-11-01 18:55:25 +01:00
committed by GitHub
commit fe32685fbc
3 muutettua tiedostoa jossa 57 lisäystä ja 19 poistoa
+39 -18
Näytä tiedosto
@@ -47,6 +47,9 @@ THE SOFTWARE.
#include "trace_helper.h"
#include "env.h"
//TODO - create a stream-based debug interface as an additional option for tprintf
#define DB_PEER_CTX 0
//=================================================================================================
//Global variables:
@@ -463,7 +466,9 @@ void ihipCtxCriticalBase_t<CtxMutex>::recomputePeerAgents()
template<>
bool ihipCtxCriticalBase_t<CtxMutex>::isPeerWatcher(const ihipCtx_t *peer)
{
auto match = std::find(_peers.begin(), _peers.end(), peer);
auto match = std::find_if(_peers.begin(), _peers.end(),
[=] (const ihipCtx_t *d) { return d->getDeviceNum() == peer->getDeviceNum(); });
return (match != std::end(_peers));
}
@@ -1679,6 +1684,9 @@ const char *ihipErrorString(hipError_t hip_error)
// So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx.
bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo)
{
if (copyEngineCtx == nullptr) {
return false;
}
// Make sure this is a device-to-device copy with all memory available to the requested copy engine
//
@@ -1686,11 +1694,18 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPoin
if (dstPtrInfo->_sizeBytes == 0) {
return false;
} else {
#if USE_APP_PTR_FOR_CTX
ihipCtx_t *dstCtx = static_cast<ihipCtx_t*> (dstPtrInfo->_appPtr);
#else
ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId);
#endif
if (copyEngineCtx != dstCtx) {
// Only checks peer list if contexts are different
LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData());
//tprintf(DB_SYNC, "dstCrit lock succeeded\n");
#if DB_PEER_CTX
std::cerr << "checking peer : copyEngineCtx =" << copyEngineCtx << " dstCtx =" << dstCtx << " peerCnt="
<< ctxCrit->peerCnt() << "\n";
#endif
if (!ctxCrit->isPeerWatcher(copyEngineCtx)) {
return false;
};
@@ -1698,16 +1713,22 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPoin
}
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
if (srcPtrInfo->_sizeBytes == 0) {
return false;
} else {
#if USE_APP_PTR_FOR_CTX
ihipCtx_t *srcCtx = static_cast<ihipCtx_t*> (srcPtrInfo->_appPtr);
#else
ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId);
#endif
if (copyEngineCtx != srcCtx) {
// Only checks peer list if contexts are different
LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData());
//tprintf(DB_SYNC, "srcCrit lock succeeded\n");
#if DB_PEER_CTX
std::cerr << "checking peer : copyEngineCtx =" << copyEngineCtx << " srcCtx =" << srcCtx << " peerCnt="
<< ctxCrit->peerCnt() << "\n";
#endif
if (!ctxCrit->isPeerWatcher(copyEngineCtx)) {
return false;
};
@@ -1807,7 +1828,7 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,
}
} else {
*forceUnpinnedCopy = true;
tprintf (DB_COPY, "P2P: Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n",
tprintf (DB_COPY, "Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n",
*copyDevice ? (*copyDevice)->getDeviceNum() : -1,
*copyDevice ? (*copyDevice)->getDevice()->_hsaAgent.handle : 0x0);
if (HIP_FAIL_SOC & 0x2) {
@@ -1822,10 +1843,11 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,
void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const hc::AmPointerInfo &ptrInfo)
{
tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d\n",
tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d allocSeqNum=%zu, appAllocationFlags=%x, appPtr=%p\n",
tag, ptr,
ptrInfo._hostPointer, ptrInfo._devicePointer, ptrInfo._sizeBytes,
ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged);
ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged,
ptrInfo._allocSeqNum, ptrInfo._appAllocationFlags, ptrInfo._appPtr);
}
@@ -1873,12 +1895,14 @@ void tailorPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeByte
};
bool getTailoredPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes)
bool getTailoredPtrInfo(const char *tag, hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes)
{
bool tracked = (hc::am_memtracker_getinfo(ptrInfo, ptr) == AM_SUCCESS);
printPointerInfo(DB_COPY, tag, ptr, *ptrInfo);
if (tracked) {
tailorPtrInfo(ptrInfo, ptr, sizeBytes);
printPointerInfo(DB_COPY, " mod", ptr, *ptrInfo);
}
return tracked;
@@ -1908,8 +1932,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
#endif
bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes);
bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes);
bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes);
bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes);
// Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not valid, so check it here:
@@ -2036,21 +2060,18 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
#endif
bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes);
bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes);
tprintf (DB_COPY, "copyASync dst=%p src=%p, sz=%zu\n", dst, src, sizeBytes);
bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes);
bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes);
hc::hcCommandKind hcCopyDir;
ihipCtx_t *copyDevice;
bool forceUnpinnedCopy;
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &copyDevice, &forceUnpinnedCopy);
tprintf (DB_COPY, "copyASync copyDev:%d dst=%p (phys_dev:%d, isDevMem:%d) src=%p(phys_dev:%d, isDevMem:%d) sz=%zu dir=%s forceUnpinnedCopy=%d\n",
tprintf (DB_COPY, " copyDev:%d dir=%s forceUnpinnedCopy=%d\n",
copyDevice ? copyDevice->getDeviceNum():-1,
dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem,
src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem,
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
// If both pointers are not tracked, we need to fall back to a sync copy.
+10 -1
Näytä tiedosto
@@ -32,10 +32,19 @@ THE SOFTWARE.
#include "env.h"
#if defined(__HCC__) && (__hcc_workweek__ < 16354)
#if (__hcc_workweek__ < 16354)
#error("This version of HIP requires a newer version of HCC.");
#endif
// Use the __appPtr field in the am memtracker to store the context.
// Requires a bug fix in HCC
#if defined(__HCC_HAS_EXTENDED_AM_MEMTRACKER_UPDATE) and (__HCC_HAS_EXTENDED_AM_MEMTRACKER_UPDATE != 0)
#define USE_APP_PTR_FOR_CTX 1
#endif
#define USE_IPC 1
//---
+8
Näytä tiedosto
@@ -61,7 +61,11 @@ int sharePtr(void *ptr, ihipCtx_t *ctx, bool shareWithAll, unsigned hipFlags)
auto device = ctx->getWriteableDevice();
#if USE_APP_PTR_FOR_CTX
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags, ctx);
#else
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags);
#endif
if (shareWithAll) {
hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt+1, g_allAgents, NULL, ptr);
@@ -660,7 +664,11 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
vecAcc.push_back(ihipGetDevice(i)->_acc);
}
am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size());
#if USE_APP_PTR_FOR_CTX
hc::am_memtracker_update(hostPtr, device->_deviceId, flags, ctx);
#else
hc::am_memtracker_update(hostPtr, device->_deviceId, flags);
#endif
tprintf(DB_MEM, " %s registered ptr=%p and allowed access to %zu peers\n", __func__, hostPtr, vecAcc.size());
if(am_status == AM_SUCCESS){