diff --git a/projects/clr/hipamd/src/hip_hcc.cpp b/projects/clr/hipamd/src/hip_hcc.cpp index 2955db5e37..ac285e6a83 100644 --- a/projects/clr/hipamd/src/hip_hcc.cpp +++ b/projects/clr/hipamd/src/hip_hcc.cpp @@ -92,6 +92,7 @@ int HIP_EVENT_SYS_RELEASE = 0; int HIP_HOST_COHERENT = 1; int HIP_SYNC_HOST_ALLOC = 1; +int HIP_SYNC_FREE = 0; int HIP_INIT_ALLOC = -1; @@ -1279,6 +1280,8 @@ void HipReadEnv() { READ_ENV_I(release, HIP_SYNC_STREAM_WAIT, 0, "hipStreamWaitEvent will synchronize to host"); + READ_ENV_I(release, HIP_SYNC_FREE, 0, + "Force all calls to hipFree to sync all devices and all streams"); READ_ENV_I(release, HIP_HOST_COHERENT, 0, "If set, all host memory will be allocated as fine-grained system memory. This " diff --git a/projects/clr/hipamd/src/hip_hcc_internal.h b/projects/clr/hipamd/src/hip_hcc_internal.h index 0e0db0c628..b40fac93a5 100644 --- a/projects/clr/hipamd/src/hip_hcc_internal.h +++ b/projects/clr/hipamd/src/hip_hcc_internal.h @@ -83,6 +83,8 @@ extern int HIP_SYNC_NULL_STREAM; extern int HIP_INIT_ALLOC; extern int HIP_FORCE_NULL_STREAM; +extern int HIP_SYNC_FREE; + extern int HIP_DUMP_CODE_OBJECT; // TODO - remove when this is standard behavior. diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index 8196be91ff..b6a1164a4a 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -1902,10 +1902,6 @@ hipError_t hipFree(void* ptr) { hipError_t hipStatus = hipErrorInvalidDevicePointer; - // Synchronize to ensure all work has finished. - ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits - // for all activity to finish. - if (ptr) { hc::accelerator acc; #if (__hcc_workweek__ >= 17332) @@ -1916,6 +1912,29 @@ hipError_t hipFree(void* ptr) { am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr); if (status == AM_SUCCESS) { if (amPointerInfo._hostPointer == NULL) { + if (HIP_SYNC_FREE) { + // Synchronize all devices, all streams + // to ensure all work has finished on all devices. + // This is disabled by default. + for (unsigned i = 0; i < g_deviceCnt; i++) { + ihipGetPrimaryCtx(i)->locked_waitAllStreams(); + } + } + else { + ihipCtx_t* ctx; + if (amPointerInfo._appId != -1) { +#if USE_APP_PTR_FOR_CTX + ctx = static_cast(amPointerInfo._appPtr); +#else + ctx = ihipGetPrimaryCtx(amPointerInfo._appId); +#endif + } else { + ctx = ihipGetTlsDefaultCtx(); + } + // Synchronize to ensure all work has finished on device owning the memory. + ctx->locked_waitAllStreams(); // ignores non-blocking streams, this waits + // for all activity to finish. + } hc::am_free(ptr); hipStatus = hipSuccess; }