2
0

Remove device mapping from shareWithAll memory

When shareWithAll memory (e.g., host memory) is allocated, set appId
in hc::AmPointerInfo to -1 to indicate that this memory is not mapped
to any device.  Peer checking in ihipStream_t::canSeeMemory is not
necessary if memory is shared with all devices.  Thus, it is skipped.

Note that earlier host memory is always mapped to device 0 and HIP
always performs peer checking for all kinds of hipMemcpy.  Since the
peer checking process requires context locking, hipMemcpy from/to host
memory always grabs device 0's context lock.  Therefore, if there is
another thread holding the context lock of device 0 (e.g.,
hipDeviceSynchronize on device 0), hipMemcpy will have to wait for the
lock until it can actually perform memcpy.  This can significantly
deteriorate execution performance.

Signed-off-by: Sarunya Pumma <sarunya.pumma@amd.com>
Este cometimento está contido em:
Sarunya Pumma
2018-07-25 14:16:32 -07:00
cometido por Sarunya Pumma
ascendente 4e3a75c019
cometimento 8111fd3b8b
2 ficheiros modificados com 9 adições e 8 eliminações
+2 -2
Ver ficheiro
@@ -1808,7 +1808,7 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t* copyEngineCtx, const hc::AmPoin
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
if (dstPtrInfo->_sizeBytes == 0) {
return false;
} else {
} else if (dstPtrInfo->_appId != -1) {
#if USE_APP_PTR_FOR_CTX
ihipCtx_t* dstCtx = static_cast<ihipCtx_t*>(dstPtrInfo->_appPtr);
#else
@@ -1831,7 +1831,7 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t* copyEngineCtx, const hc::AmPoin
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
if (srcPtrInfo->_sizeBytes == 0) {
return false;
} else {
} else if (srcPtrInfo->_appId != -1) {
#if USE_APP_PTR_FOR_CTX
ihipCtx_t* srcCtx = static_cast<ihipCtx_t*>(srcPtrInfo->_appPtr);
#else
+7 -6
Ver ficheiro
@@ -61,19 +61,20 @@ int sharePtr(void* ptr, ihipCtx_t* ctx, bool shareWithAll, unsigned hipFlags) {
auto device = ctx->getWriteableDevice();
#if USE_APP_PTR_FOR_CTX
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags, ctx);
#else
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags);
#endif
if (shareWithAll) {
// shareWithAll memory is not mapped to any device
hc::am_memtracker_update(ptr, -1, hipFlags);
hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt + 1, g_allAgents, NULL, ptr);
tprintf(DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt);
if (s != HSA_STATUS_SUCCESS) {
ret = -1;
}
} else {
#if USE_APP_PTR_FOR_CTX
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags, ctx);
#else
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags);
#endif
int peerCnt = 0;
{
LockedAccessor_CtxCrit_t crit(ctx->criticalData());