Improve Peer support and testing.
Change-Id: Icadc65988aaf145a265587ab0357c5bf4d26f3eb
[ROCm/hip commit: d728819d17]
Dieser Commit ist enthalten in:
@@ -1731,26 +1731,35 @@ void ihipSetTs(hipEvent_t e)
|
||||
// Returns true if thisCtx can see the memory allocated on dstCtx and srcCtx.
|
||||
// The peer-list for a context controls which contexts have access to the memory allocated on that context.
|
||||
// So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx.
|
||||
bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx, ihipCtx_t *srcCtx)
|
||||
bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo)
|
||||
{
|
||||
tprintf (DB_COPY, "Checking if direct copy can be used. thisCtx:%s; dstCtx:%s ; srcCtx:%s\n",
|
||||
thisCtx->toString().c_str(), dstCtx->toString().c_str(), srcCtx->toString().c_str());
|
||||
|
||||
// Use blocks to control scope of critical sections.
|
||||
{
|
||||
LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData());
|
||||
tprintf(DB_SYNC, "dstCrit lock succeeded\n");
|
||||
if (!ctxCrit->isPeerWatcher(thisCtx)) {
|
||||
return false;
|
||||
};
|
||||
if (dstPtrInfo->_appId != -1) {
|
||||
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
|
||||
ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId);
|
||||
if (thisCtx != dstCtx) {
|
||||
// Only checks peer list if contexts are different
|
||||
LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData());
|
||||
//tprintf(DB_SYNC, "dstCrit lock succeeded\n");
|
||||
if (!ctxCrit->isPeerWatcher(thisCtx)) {
|
||||
return false;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData());
|
||||
tprintf(DB_SYNC, "srcCrit lock succeeded\n");
|
||||
if (!ctxCrit->isPeerWatcher(thisCtx)) {
|
||||
return false;
|
||||
};
|
||||
|
||||
if (srcPtrInfo->_appId != -1) {
|
||||
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
|
||||
ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId);
|
||||
if (thisCtx != srcCtx) {
|
||||
// Only checks peer list if contexts are different
|
||||
LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData());
|
||||
//tprintf(DB_SYNC, "srcCrit lock succeeded\n");
|
||||
if (!ctxCrit->isPeerWatcher(thisCtx)) {
|
||||
return false;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -1804,7 +1813,7 @@ unsigned ihipStream_t::resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDev
|
||||
|
||||
// hipMemKind must be "resolved" to a specific direction - cannot be default.
|
||||
void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo,
|
||||
hc::hcCommandKind *hcCopyDir, bool *forceHostCopyEngine)
|
||||
hc::hcCommandKind *hcCopyDir, bool *forceP2PCopyEngine)
|
||||
{
|
||||
ihipCtx_t *ctx = this->getCtx();
|
||||
|
||||
@@ -1823,21 +1832,19 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPoi
|
||||
|
||||
// If this is P2P access, we need to check to see if the copy agent (specified by the stream where the copy is enqueued)
|
||||
// has peer access enabled to both the source and dest. If this is true, then the copy agent can see both pointers
|
||||
// and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (*forceHostCopyEngine=true).
|
||||
*forceHostCopyEngine = false;
|
||||
if (*hcCopyDir == hc::hcMemcpyDeviceToDevice) {
|
||||
if (!canSeePeerMemory(ctx, ihipGetPrimaryCtx(dstPtrInfo->_appId), ihipGetPrimaryCtx(srcPtrInfo->_appId))) {
|
||||
*forceHostCopyEngine = true;
|
||||
tprintf (DB_COPY, "P2P D2D : copy engine cannot see both host and device pointers - forcing copy through staging buffers.\n");
|
||||
// and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (*forceP2PCopyEngine=true).
|
||||
*forceP2PCopyEngine = false;
|
||||
if (!canSeePeerMemory(ctx, dstPtrInfo, srcPtrInfo)) {
|
||||
*forceP2PCopyEngine = true;
|
||||
tprintf (DB_COPY, "P2P: copy engine(dev:%d) cannot see both host and device pointers - forcing copy through staging buffers.\n", ctx->getDeviceNum());
|
||||
} else {
|
||||
if (HIP_FORCE_P2P_HOST ) {
|
||||
*forceP2PCopyEngine = true;
|
||||
tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst but HIP_FORCE_P2P_HOST=0, forcing copy through staging buffers.\n", ctx->getDeviceNum());
|
||||
} else {
|
||||
if (HIP_FORCE_P2P_HOST ) {
|
||||
*forceHostCopyEngine = true;
|
||||
tprintf (DB_COPY, "P2P D2D. Copy engine can see src and dst but HIP_FORCE_P2P_HOST=0, forcing copy through staging buffers.\n");
|
||||
} else {
|
||||
tprintf (DB_COPY, "P2P D2D. Copy engine can see src and dst, Will use SDMA engine on streamDevice=%s.\n", ctx->toString().c_str());
|
||||
}
|
||||
tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst.\n", ctx->getDeviceNum());
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1859,8 +1866,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
|
||||
|
||||
hc::hcCommandKind hcCopyDir;
|
||||
bool forceHostCopyEngine;
|
||||
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceHostCopyEngine);
|
||||
bool forceP2PCopyEngine;
|
||||
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceP2PCopyEngine);
|
||||
|
||||
|
||||
|
||||
@@ -1868,15 +1875,15 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
LockedAccessor_StreamCrit_t crit (_criticalData);
|
||||
#if DISABLE_COPY_EXT
|
||||
#warning ("Disabled copy_ext path, P2P host staging copies will not work")
|
||||
tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceHostCopyEngine=%d. Call HCC copy\n",
|
||||
ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceHostCopyEngine);
|
||||
tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d. Call HCC copy\n",
|
||||
ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine);
|
||||
// Note - peer-to-peer copies which require host staging will not work in this path.
|
||||
crit->_av.copy(src, dst, sizeBytes);
|
||||
#else
|
||||
// If srcTracked == dstTracked =1 and forceHostCopyEngine=0 then we wil use async SDMA. (assuming HCC implementation doesn't override somehow)
|
||||
tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceHostCopyEngine=%d. Call HCC copy_ext.\n",
|
||||
ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceHostCopyEngine);
|
||||
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceHostCopyEngine);
|
||||
// If srcTracked == dstTracked =1 and forceP2PCopyEngine=0 then we wil use async SDMA. (assuming HCC implementation doesn't override somehow)
|
||||
tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d. Call HCC copy_ext.\n",
|
||||
ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine);
|
||||
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceP2PCopyEngine);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -1914,17 +1921,17 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
|
||||
|
||||
hc::hcCommandKind hcCopyDir;
|
||||
bool forceHostCopyEngine;
|
||||
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceHostCopyEngine);
|
||||
bool forceP2PCopyEngine;
|
||||
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceP2PCopyEngine);
|
||||
|
||||
|
||||
tprintf (DB_COPY, "copyAsync dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceHostCopyEngine=%d\n",
|
||||
dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceHostCopyEngine);
|
||||
tprintf (DB_COPY, "copyAsync dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d\n",
|
||||
dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine);
|
||||
|
||||
|
||||
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
|
||||
// If both pointers are not tracked, we need to fall back to a sync copy.
|
||||
if (dstTracked && srcTracked && !forceHostCopyEngine) {
|
||||
if (dstTracked && srcTracked && !forceP2PCopyEngine) {
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
|
||||
// Perform asynchronous copy:
|
||||
@@ -1947,7 +1954,7 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
// Note - peer-to-peer copies which require host staging will not work in this path.
|
||||
crit->_av.copy(src, dst, sizeBytes);
|
||||
#else
|
||||
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceHostCopyEngine);
|
||||
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceP2PCopyEngine);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -493,7 +493,7 @@ private:
|
||||
void resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo,
|
||||
hc::hcCommandKind *hcCopyDir, bool *forceHostCopyEngine);
|
||||
|
||||
bool canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx, ihipCtx_t *srcCtx);
|
||||
bool canSeePeerMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo);
|
||||
|
||||
|
||||
private: // Data
|
||||
|
||||
@@ -53,7 +53,9 @@ void help(char *argv[])
|
||||
static hipError_t myHipMemcpy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream, bool async)
|
||||
{
|
||||
if (async) {
|
||||
return hipMemcpyAsync(dest, src, sizeBytes, kind, stream);
|
||||
hipError_t e = hipMemcpyAsync(dest, src, sizeBytes, kind, stream);
|
||||
//HIPCHECK(hipStreamSynchronize(stream));
|
||||
return (e);
|
||||
} else {
|
||||
return hipMemcpy(dest, src, sizeBytes, kind);
|
||||
};
|
||||
@@ -84,6 +86,19 @@ void parseMyArguments(int argc, char *argv[])
|
||||
};
|
||||
};
|
||||
|
||||
void syncBothDevices()
|
||||
{
|
||||
int saveDevice;
|
||||
HIPCHECK(hipGetDevice(&saveDevice));
|
||||
HIPCHECK(hipSetDevice(g_currentDevice));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HIPCHECK(hipSetDevice(g_peerDevice));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HIPCHECK(hipSetDevice(saveDevice));
|
||||
}
|
||||
|
||||
|
||||
// Sets globals g_currentDevice, g_peerDevice
|
||||
void setupPeerTests()
|
||||
@@ -162,6 +177,9 @@ void enablePeerFirst(bool useAsyncCopy)
|
||||
// Copy data back to host:
|
||||
HIPCHECK (hipSetDevice(g_peerDevice));
|
||||
HIPCHECK (myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0/*stream*/, useAsyncCopy));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HIPCHECK (hipSetDevice(g_currentDevice));
|
||||
|
||||
// Check host data:
|
||||
for (int i=0; i<N; i++) {
|
||||
@@ -226,6 +244,9 @@ void allocMemoryFirst(bool useAsyncCopy)
|
||||
// Copy data back to host:
|
||||
HIPCHECK (hipSetDevice(g_peerDevice));
|
||||
HIPCHECK (myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0/*stream*/, useAsyncCopy));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HIPCHECK (hipSetDevice(g_currentDevice));
|
||||
|
||||
|
||||
//---
|
||||
@@ -281,6 +302,10 @@ void testPeerHostToDevice(bool useAsyncCopy)
|
||||
HIPCHECK (hipMalloc(&A_d1, Nbytes) );
|
||||
HIPCHECK (hipMemset(A_d1, 0x13, Nbytes) );
|
||||
|
||||
bool firstAsyncCopy = useAsyncCopy; /*TODO - should be useAsyncCopy*/
|
||||
|
||||
syncBothDevices();
|
||||
|
||||
|
||||
|
||||
// Device0 push to device1, using P2P:
|
||||
@@ -291,18 +316,24 @@ void testPeerHostToDevice(bool useAsyncCopy)
|
||||
// if p_mirrorPeers = 1, this is accelerated copy over PCIe.
|
||||
// if p_mirrorPeers = 0, this should fall back to host (because peer can't see A_d0)
|
||||
HIPCHECK (hipSetDevice(g_peerDevice));
|
||||
HIPCHECK (myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyHostToDevice, 0/*stream*/, useAsyncCopy)); // This is P2P copy.
|
||||
HIPCHECK (myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyHostToDevice, 0/*stream*/, firstAsyncCopy)); // This is P2P copy.
|
||||
} else {
|
||||
// p_memcpyWithPeer=0 case is HostToDevice.
|
||||
// if p_mirrorPeers = 1, this is accelerated copy over PCIe.
|
||||
// if p_mirrorPeers = 0, this should fall back to host (because device0 can't see A_d1)
|
||||
HIPCHECK (hipSetDevice(g_currentDevice));
|
||||
HIPCHECK (myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyHostToDevice, 0/*stream*/, useAsyncCopy)); // This is P2P copy.
|
||||
HIPCHECK (myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyHostToDevice, 0/*stream*/, firstAsyncCopy)); // This is P2P copy.
|
||||
}
|
||||
|
||||
syncBothDevices();
|
||||
|
||||
// Copy data back to host:
|
||||
HIPCHECK (hipSetDevice(g_peerDevice));
|
||||
HIPCHECK (myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0/*stream*/, useAsyncCopy));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HIPCHECK (hipSetDevice(g_currentDevice));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
// Check host data:
|
||||
for (int i=0; i<N; i++) {
|
||||
@@ -358,7 +389,9 @@ int main(int argc, char *argv[])
|
||||
parseMyArguments(argc, argv);
|
||||
|
||||
|
||||
testPeerHostToDevice(false/*useAsyncCopy*/);
|
||||
if (p_tests & 0x100) {
|
||||
testPeerHostToDevice(false/*useAsyncCopy*/);
|
||||
}
|
||||
testPeerHostToDevice(true/*useAsyncCopy*/);
|
||||
|
||||
if (p_tests & 0x1) {
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren