From 8111fd3b8bd9a46c372ca709e92bb160c2aeddf4 Mon Sep 17 00:00:00 2001 From: Sarunya Pumma Date: Wed, 25 Jul 2018 14:16:32 -0700 Subject: [PATCH] Remove device mapping from shareWithAll memory When shareWithAll memory (e.g., host memory) is allocated, set appId in hc::AmPointerInfo to -1 to indicate that this memory is not mapped to any device. Peer checking in ihipStream_t::canSeeMemory is not necessary if memory is shared with all devices. Thus, it is skipped. Note that earlier host memory is always mapped to device 0 and HIP always performs peer checking for all kinds of hipMemcpy. Since the peer checking process requires context locking, hipMemcpy from/to host memory always grabs device 0's context lock. Therefore, if there is another thread holding the context lock of device 0 (e.g., hipDeviceSynchronize on device 0), hipMemcpy will have to wait for the lock until it can actually perform memcpy. This can significantly deteriorate execution performance. Signed-off-by: Sarunya Pumma --- src/hip_hcc.cpp | 4 ++-- src/hip_memory.cpp | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index c23bdf6dad..aecf75b717 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -1808,7 +1808,7 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t* copyEngineCtx, const hc::AmPoin // TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here: if (dstPtrInfo->_sizeBytes == 0) { return false; - } else { + } else if (dstPtrInfo->_appId != -1) { #if USE_APP_PTR_FOR_CTX ihipCtx_t* dstCtx = static_cast(dstPtrInfo->_appPtr); #else @@ -1831,7 +1831,7 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t* copyEngineCtx, const hc::AmPoin // TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here: if (srcPtrInfo->_sizeBytes == 0) { return false; - } else { + } else if (srcPtrInfo->_appId != -1) { #if USE_APP_PTR_FOR_CTX ihipCtx_t* srcCtx = static_cast(srcPtrInfo->_appPtr); #else diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index d6c04ae98c..dc5390f014 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -61,19 +61,20 @@ int sharePtr(void* ptr, ihipCtx_t* ctx, bool shareWithAll, unsigned hipFlags) { auto device = ctx->getWriteableDevice(); -#if USE_APP_PTR_FOR_CTX - hc::am_memtracker_update(ptr, device->_deviceId, hipFlags, ctx); -#else - hc::am_memtracker_update(ptr, device->_deviceId, hipFlags); -#endif - if (shareWithAll) { + // shareWithAll memory is not mapped to any device + hc::am_memtracker_update(ptr, -1, hipFlags); hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt + 1, g_allAgents, NULL, ptr); tprintf(DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt); if (s != HSA_STATUS_SUCCESS) { ret = -1; } } else { +#if USE_APP_PTR_FOR_CTX + hc::am_memtracker_update(ptr, device->_deviceId, hipFlags, ctx); +#else + hc::am_memtracker_update(ptr, device->_deviceId, hipFlags); +#endif int peerCnt = 0; { LockedAccessor_CtxCrit_t crit(ctx->criticalData());