diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index d3f87a15c9..278ada0c94 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -1731,26 +1731,35 @@ void ihipSetTs(hipEvent_t e) // Returns true if thisCtx can see the memory allocated on dstCtx and srcCtx. // The peer-list for a context controls which contexts have access to the memory allocated on that context. // So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx. -bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx, ihipCtx_t *srcCtx) +bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo) { - tprintf (DB_COPY, "Checking if direct copy can be used. thisCtx:%s; dstCtx:%s ; srcCtx:%s\n", - thisCtx->toString().c_str(), dstCtx->toString().c_str(), srcCtx->toString().c_str()); - // Use blocks to control scope of critical sections. - { - LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData()); - tprintf(DB_SYNC, "dstCrit lock succeeded\n"); - if (!ctxCrit->isPeerWatcher(thisCtx)) { - return false; - }; + if (dstPtrInfo->_appId != -1) { + // TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here: + ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId); + if (thisCtx != dstCtx) { + // Only checks peer list if contexts are different + LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData()); + //tprintf(DB_SYNC, "dstCrit lock succeeded\n"); + if (!ctxCrit->isPeerWatcher(thisCtx)) { + return false; + }; + } } + - { - LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData()); - tprintf(DB_SYNC, "srcCrit lock succeeded\n"); - if (!ctxCrit->isPeerWatcher(thisCtx)) { - return false; - }; + + if (srcPtrInfo->_appId != -1) { + // TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here: + ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId); + if (thisCtx != srcCtx) { + // Only checks peer list if contexts are different + LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData()); + //tprintf(DB_SYNC, "srcCrit lock succeeded\n"); + if (!ctxCrit->isPeerWatcher(thisCtx)) { + return false; + }; + } } return true; @@ -1804,7 +1813,7 @@ unsigned ihipStream_t::resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDev // hipMemKind must be "resolved" to a specific direction - cannot be default. void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo, - hc::hcCommandKind *hcCopyDir, bool *forceHostCopyEngine) + hc::hcCommandKind *hcCopyDir, bool *forceP2PCopyEngine) { ihipCtx_t *ctx = this->getCtx(); @@ -1823,21 +1832,19 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPoi // If this is P2P access, we need to check to see if the copy agent (specified by the stream where the copy is enqueued) // has peer access enabled to both the source and dest. If this is true, then the copy agent can see both pointers - // and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (*forceHostCopyEngine=true). - *forceHostCopyEngine = false; - if (*hcCopyDir == hc::hcMemcpyDeviceToDevice) { - if (!canSeePeerMemory(ctx, ihipGetPrimaryCtx(dstPtrInfo->_appId), ihipGetPrimaryCtx(srcPtrInfo->_appId))) { - *forceHostCopyEngine = true; - tprintf (DB_COPY, "P2P D2D : copy engine cannot see both host and device pointers - forcing copy through staging buffers.\n"); + // and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (*forceP2PCopyEngine=true). + *forceP2PCopyEngine = false; + if (!canSeePeerMemory(ctx, dstPtrInfo, srcPtrInfo)) { + *forceP2PCopyEngine = true; + tprintf (DB_COPY, "P2P: copy engine(dev:%d) cannot see both host and device pointers - forcing copy through staging buffers.\n", ctx->getDeviceNum()); + } else { + if (HIP_FORCE_P2P_HOST ) { + *forceP2PCopyEngine = true; + tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst but HIP_FORCE_P2P_HOST=0, forcing copy through staging buffers.\n", ctx->getDeviceNum()); } else { - if (HIP_FORCE_P2P_HOST ) { - *forceHostCopyEngine = true; - tprintf (DB_COPY, "P2P D2D. Copy engine can see src and dst but HIP_FORCE_P2P_HOST=0, forcing copy through staging buffers.\n"); - } else { - tprintf (DB_COPY, "P2P D2D. Copy engine can see src and dst, Will use SDMA engine on streamDevice=%s.\n", ctx->toString().c_str()); - } + tprintf (DB_COPY, "P2P. Copy engine (dev:%d) can see src and dst.\n", ctx->getDeviceNum()); } - }; + } } @@ -1859,8 +1866,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, hc::hcCommandKind hcCopyDir; - bool forceHostCopyEngine; - resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceHostCopyEngine); + bool forceP2PCopyEngine; + resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceP2PCopyEngine); @@ -1868,15 +1875,15 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, LockedAccessor_StreamCrit_t crit (_criticalData); #if DISABLE_COPY_EXT #warning ("Disabled copy_ext path, P2P host staging copies will not work") - tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceHostCopyEngine=%d. Call HCC copy\n", - ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceHostCopyEngine); + tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d. Call HCC copy\n", + ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine); // Note - peer-to-peer copies which require host staging will not work in this path. crit->_av.copy(src, dst, sizeBytes); #else - // If srcTracked == dstTracked =1 and forceHostCopyEngine=0 then we wil use async SDMA. (assuming HCC implementation doesn't override somehow) - tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceHostCopyEngine=%d. Call HCC copy_ext.\n", - ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceHostCopyEngine); - crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceHostCopyEngine); + // If srcTracked == dstTracked =1 and forceP2PCopyEngine=0 then we wil use async SDMA. (assuming HCC implementation doesn't override somehow) + tprintf (DB_COPY, "copySync copyEngine_dev:%d dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d. Call HCC copy_ext.\n", + ctx->getDeviceNum(), dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine); + crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceP2PCopyEngine); #endif } } @@ -1914,17 +1921,17 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes hc::hcCommandKind hcCopyDir; - bool forceHostCopyEngine; - resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceHostCopyEngine); + bool forceP2PCopyEngine; + resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, &forceP2PCopyEngine); - tprintf (DB_COPY, "copyAsync dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceHostCopyEngine=%d\n", - dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceHostCopyEngine); + tprintf (DB_COPY, "copyAsync dst=%p(home_dev:%d) src=%p(home_dev:%d) sz=%zu dstTracked=%d srcTracked=%d dir=%s forceP2PCopyEngine=%d\n", + dst, dstPtrInfo._appId, src, srcPtrInfo._appId, sizeBytes, dstTracked, srcTracked, hcMemcpyStr(hcCopyDir), forceP2PCopyEngine); // "tracked" really indicates if the pointer's virtual address is available in the GPU address space. // If both pointers are not tracked, we need to fall back to a sync copy. - if (dstTracked && srcTracked && !forceHostCopyEngine) { + if (dstTracked && srcTracked && !forceP2PCopyEngine) { LockedAccessor_StreamCrit_t crit(_criticalData); // Perform asynchronous copy: @@ -1947,7 +1954,7 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes // Note - peer-to-peer copies which require host staging will not work in this path. crit->_av.copy(src, dst, sizeBytes); #else - crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceHostCopyEngine); + crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceP2PCopyEngine); #endif } } diff --git a/projects/hip/src/hip_hcc.h b/projects/hip/src/hip_hcc.h index 30512a9bd2..4ebf002a58 100644 --- a/projects/hip/src/hip_hcc.h +++ b/projects/hip/src/hip_hcc.h @@ -493,7 +493,7 @@ private: void resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo, hc::hcCommandKind *hcCopyDir, bool *forceHostCopyEngine); - bool canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx, ihipCtx_t *srcCtx); + bool canSeePeerMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo); private: // Data diff --git a/projects/hip/tests/src/hipPeerToPeer_simple.cpp b/projects/hip/tests/src/hipPeerToPeer_simple.cpp index c2fc2e065c..a0bf6abac1 100644 --- a/projects/hip/tests/src/hipPeerToPeer_simple.cpp +++ b/projects/hip/tests/src/hipPeerToPeer_simple.cpp @@ -53,7 +53,9 @@ void help(char *argv[]) static hipError_t myHipMemcpy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream, bool async) { if (async) { - return hipMemcpyAsync(dest, src, sizeBytes, kind, stream); + hipError_t e = hipMemcpyAsync(dest, src, sizeBytes, kind, stream); + //HIPCHECK(hipStreamSynchronize(stream)); + return (e); } else { return hipMemcpy(dest, src, sizeBytes, kind); }; @@ -84,6 +86,19 @@ void parseMyArguments(int argc, char *argv[]) }; }; +void syncBothDevices() +{ + int saveDevice; + HIPCHECK(hipGetDevice(&saveDevice)); + HIPCHECK(hipSetDevice(g_currentDevice)); + HIPCHECK(hipDeviceSynchronize()); + + HIPCHECK(hipSetDevice(g_peerDevice)); + HIPCHECK(hipDeviceSynchronize()); + + HIPCHECK(hipSetDevice(saveDevice)); +} + // Sets globals g_currentDevice, g_peerDevice void setupPeerTests() @@ -162,6 +177,9 @@ void enablePeerFirst(bool useAsyncCopy) // Copy data back to host: HIPCHECK (hipSetDevice(g_peerDevice)); HIPCHECK (myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0/*stream*/, useAsyncCopy)); + HIPCHECK(hipDeviceSynchronize()); + + HIPCHECK (hipSetDevice(g_currentDevice)); // Check host data: for (int i=0; i