Improve Peer support and testing.

Change-Id: Icadc65988aaf145a265587ab0357c5bf4d26f3eb


[ROCm/hip commit: d728819d17]
Dieser Commit ist enthalten in:
Ben Sander
2016-11-06 03:22:36 -06:00
Ursprung 2a9aa89491
Commit 56cd9ce82f
3 geänderte Dateien mit 89 neuen und 49 gelöschten Zeilen
+51 -44
Datei anzeigen
@@ -1731,26 +1731,35 @@ void ihipSetTs(hipEvent_t e)
// Returns true if thisCtx can see the memory allocated on dstCtx and srcCtx.
// The peer-list for a context controls which contexts have access to the memory allocated on that context.
// So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx.
bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx, ihipCtx_t *srcCtx)
bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo)
{
tprintf (DB_COPY, "Checking if direct copy can be used. thisCtx:%s; dstCtx:%s ; srcCtx:%s\n",
thisCtx->toString().c_str(), dstCtx->toString().c_str(), srcCtx->toString().c_str());
// Use blocks to control scope of critical sections.
{
LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData());
tprintf(DB_SYNC, "dstCrit lock succeeded\n");
if (!ctxCrit->isPeerWatcher(thisCtx)) {
return false;
};
if (dstPtrInfo->_appId != -1) {
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId);
if (thisCtx != dstCtx) {
// Only checks peer list if contexts are different
LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData());
//tprintf(DB_SYNC, "dstCrit lock succeeded\n");
if (!ctxCrit->isPeerWatcher(thisCtx)) {
return false;
};
}
}
{
LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData());
tprintf(DB_SYNC, "srcCrit lock succeeded\n");
if (!ctxCrit->isPeerWatcher(thisCtx)) {
return false;
};
if (srcPtrInfo->_appId != -1) {
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId);
if (thisCtx != srcCtx) {
// Only checks peer list if contexts are different
LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData());
//tprintf(DB_SYNC, "srcCrit lock succeeded\n");
if (!ctxCrit->isPeerWatcher(thisCtx)) {
return false;
};
}
}
return true;
@@ -1804,7 +1813,7 @@ unsigned ihipStream_t::resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDev
// hipMemKind must be "resolved" to a specific direction - cannot be default.
void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo,
hc::hcCommandKind *hcCopyDir, bool *forceHostCopyEngine)
hc::hcCommandKind *hcCopyDir, bool *forceP2PCopyEngine)
{
ihipCtx_t *ctx = this->getCtx();
@@ -1823,21 +1832,19 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPoi
// If this is P2P access, we need to check to see if the copy agent (specified by the stream where the copy is enqueued)
// has peer access enabled to both the source and dest. If this is true, then the copy agent can see both pointers
// and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (*forceHostCopyEngine=true).
*forceHostCopyEngine = false;
if (*hcCopyDir == hc::hcMemcpyDeviceToDevice) {
if (!canSeePeerMemory(ctx, ihipGetPrimaryCtx(dstPtrInfo->_appId), ihipGetPrimaryCtx(srcPtrInfo->_appId))) {
*forceHostCopyEngine = true;
tprintf (DB_COPY, "P2P D2D : copy engine cannot see both host and device pointers - forcing copy through staging buffers.\n");
// and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (*forceP2PCopyEngine=true).
*forceP2PCopyEngine = false;
if (!canSeePeerMemory(ctx, dstPtrInfo, srcPtrInfo)) {
*forceP2PCopyEngine = true;
tprintf (DB_COPY, "P2P: copy engine(dev:%d) cannot see both host and device pointers - forcing copy through staging buffers.\n", ctx->getDeviceNum());
} else {
if (HIP_FORCE_P2P_HOST ) {
*forceP2PCopyEngine = true;
tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst but HIP_FORCE_P2P_HOST=0, forcing copy through staging buffers.\n", ctx->getDeviceNum());
} else {
if (HIP_FORCE_P2P_HOST ) {
*forceHostCopyEngine = true;
tprintf (DB_COPY, "P2P D2D. Copy engine can see src and dst but HIP_FORCE_P2P_HOST=0, forcing copy through staging buffers.\n");
} else {
tprintf (DB_COPY, "P2P D2D. Copy engine can see src and dst, Will use SDMA engine on streamDevice=%s.\n", ctx->toString().c_str());
}
tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst.\n", ctx->getDeviceNum());
}
};
}
}
@@ -1859,8 +1866,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
hc::hcCommandKind hcCopyDir;
bool forceHostCopyEngine;
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceHostCopyEngine);
bool forceP2PCopyEngine;
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceP2PCopyEngine);
@@ -1868,15 +1875,15 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
LockedAccessor_StreamCrit_t crit (_criticalData);
#if DISABLE_COPY_EXT
#warning ("Disabled copy_ext path, P2P host staging copies will not work")
tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceHostCopyEngine=%d. Call HCC copy\n",
ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceHostCopyEngine);
tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d. Call HCC copy\n",
ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine);
// Note - peer-to-peer copies which require host staging will not work in this path.
crit->_av.copy(src, dst, sizeBytes);
#else
// If srcTracked == dstTracked =1 and forceHostCopyEngine=0 then we wil use async SDMA. (assuming HCC implementation doesn't override somehow)
tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceHostCopyEngine=%d. Call HCC copy_ext.\n",
ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceHostCopyEngine);
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceHostCopyEngine);
// If srcTracked == dstTracked =1 and forceP2PCopyEngine=0 then we wil use async SDMA. (assuming HCC implementation doesn't override somehow)
tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d. Call HCC copy_ext.\n",
ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine);
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceP2PCopyEngine);
#endif
}
}
@@ -1914,17 +1921,17 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
hc::hcCommandKind hcCopyDir;
bool forceHostCopyEngine;
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceHostCopyEngine);
bool forceP2PCopyEngine;
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceP2PCopyEngine);
tprintf (DB_COPY, "copyAsync dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceHostCopyEngine=%d\n",
dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceHostCopyEngine);
tprintf (DB_COPY, "copyAsync dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d\n",
dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine);
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
// If both pointers are not tracked, we need to fall back to a sync copy.
if (dstTracked && srcTracked && !forceHostCopyEngine) {
if (dstTracked && srcTracked && !forceP2PCopyEngine) {
LockedAccessor_StreamCrit_t crit(_criticalData);
// Perform asynchronous copy:
@@ -1947,7 +1954,7 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
// Note - peer-to-peer copies which require host staging will not work in this path.
crit->_av.copy(src, dst, sizeBytes);
#else
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceHostCopyEngine);
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceP2PCopyEngine);
#endif
}
}
+1 -1
Datei anzeigen
@@ -493,7 +493,7 @@ private:
void resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo,
hc::hcCommandKind *hcCopyDir, bool *forceHostCopyEngine);
bool canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx, ihipCtx_t *srcCtx);
bool canSeePeerMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo);
private: // Data
@@ -53,7 +53,9 @@ void help(char *argv[])
static hipError_t myHipMemcpy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream, bool async)
{
if (async) {
return hipMemcpyAsync(dest, src, sizeBytes, kind, stream);
hipError_t e = hipMemcpyAsync(dest, src, sizeBytes, kind, stream);
//HIPCHECK(hipStreamSynchronize(stream));
return (e);
} else {
return hipMemcpy(dest, src, sizeBytes, kind);
};
@@ -84,6 +86,19 @@ void parseMyArguments(int argc, char *argv[])
};
};
void syncBothDevices()
{
int saveDevice;
HIPCHECK(hipGetDevice(&saveDevice));
HIPCHECK(hipSetDevice(g_currentDevice));
HIPCHECK(hipDeviceSynchronize());
HIPCHECK(hipSetDevice(g_peerDevice));
HIPCHECK(hipDeviceSynchronize());
HIPCHECK(hipSetDevice(saveDevice));
}
// Sets globals g_currentDevice, g_peerDevice
void setupPeerTests()
@@ -162,6 +177,9 @@ void enablePeerFirst(bool useAsyncCopy)
// Copy data back to host:
HIPCHECK (hipSetDevice(g_peerDevice));
HIPCHECK (myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0/*stream*/, useAsyncCopy));
HIPCHECK(hipDeviceSynchronize());
HIPCHECK (hipSetDevice(g_currentDevice));
// Check host data:
for (int i=0; i<N; i++) {
@@ -226,6 +244,9 @@ void allocMemoryFirst(bool useAsyncCopy)
// Copy data back to host:
HIPCHECK (hipSetDevice(g_peerDevice));
HIPCHECK (myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0/*stream*/, useAsyncCopy));
HIPCHECK(hipDeviceSynchronize());
HIPCHECK (hipSetDevice(g_currentDevice));
//---
@@ -281,6 +302,10 @@ void testPeerHostToDevice(bool useAsyncCopy)
HIPCHECK (hipMalloc(&A_d1, Nbytes) );
HIPCHECK (hipMemset(A_d1, 0x13, Nbytes) );
bool firstAsyncCopy = useAsyncCopy; /*TODO - should be useAsyncCopy*/
syncBothDevices();
// Device0 push to device1, using P2P:
@@ -291,18 +316,24 @@ void testPeerHostToDevice(bool useAsyncCopy)
// if p_mirrorPeers = 1, this is accelerated copy over PCIe.
// if p_mirrorPeers = 0, this should fall back to host (because peer can't see A_d0)
HIPCHECK (hipSetDevice(g_peerDevice));
HIPCHECK (myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyHostToDevice, 0/*stream*/, useAsyncCopy)); // This is P2P copy.
HIPCHECK (myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyHostToDevice, 0/*stream*/, firstAsyncCopy)); // This is P2P copy.
} else {
// p_memcpyWithPeer=0 case is HostToDevice.
// if p_mirrorPeers = 1, this is accelerated copy over PCIe.
// if p_mirrorPeers = 0, this should fall back to host (because device0 can't see A_d1)
HIPCHECK (hipSetDevice(g_currentDevice));
HIPCHECK (myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyHostToDevice, 0/*stream*/, useAsyncCopy)); // This is P2P copy.
HIPCHECK (myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyHostToDevice, 0/*stream*/, firstAsyncCopy)); // This is P2P copy.
}
syncBothDevices();
// Copy data back to host:
HIPCHECK (hipSetDevice(g_peerDevice));
HIPCHECK (myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0/*stream*/, useAsyncCopy));
HIPCHECK(hipDeviceSynchronize());
HIPCHECK (hipSetDevice(g_currentDevice));
HIPCHECK(hipDeviceSynchronize());
// Check host data:
for (int i=0; i<N; i++) {
@@ -358,7 +389,9 @@ int main(int argc, char *argv[])
parseMyArguments(argc, argv);
testPeerHostToDevice(false/*useAsyncCopy*/);
if (p_tests & 0x100) {
testPeerHostToDevice(false/*useAsyncCopy*/);
}
testPeerHostToDevice(true/*useAsyncCopy*/);
if (p_tests & 0x1) {