From db3f4889ca86b7a7d17bfffa9038d740ea2af100 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Thu, 19 Jan 2017 23:22:06 -0600 Subject: [PATCH] Add HIP_SYNC_HOST_ALLOC, HipReadEnv --- hipamd/src/hip_hcc.cpp | 37 +++++++++++++++++++++++++------------ hipamd/src/hip_hcc.h | 2 +- hipamd/src/hip_memory.cpp | 9 +++++++++ 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 544fdc110d..6360097557 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -92,6 +92,10 @@ int HIP_FORCE_SYNC_COPY = 0; int HIP_COHERENT_HOST_ALLOC = 0; +// TODO - set to 0 once we resolve stability. +// USE_ HIP_SYNC_HOST_ALLOC +int HIP_SYNC_HOST_ALLOC = 1; + @@ -1270,19 +1274,8 @@ std::string HIP_VISIBLE_DEVICES_callback(void *var_ptr, const char *envVarString } -//--- -//Function called one-time at initialization time to construct a table of all GPU devices. -//HIP/CUDA uses integer "deviceIds" - these are indexes into this table. -//AMP maintains a table of accelerators, but some are emulated - ie for debug or CPU. -//This function creates a vector with only the GPU accelerators. -//It is called with C++11 call_once, which provided thread-safety. -void ihipInit() +void HipReadEnv() { - -#if COMPILE_HIP_ATP_MARKER - amdtInitializeActivityLogger(); - amdtScopedMarker("ihipInit", "HIP", NULL); -#endif /* * Environment variables */ @@ -1324,6 +1317,8 @@ void ihipInit() READ_ENV_I(release, HIP_FORCE_P2P_HOST, 0, "Force use of host/staging copy for peer-to-peer copies.1=always use copies, 2=always return false for hipDeviceCanAccessPeer"); READ_ENV_I(release, HIP_FORCE_SYNC_COPY, 0, "Force all copies (even hipMemcpyAsync) to use sync copies"); + READ_ENV_I(release, HIP_SYNC_HOST_ALLOC, 0, "Sync before and after all host memory allocations. May help stability"); + // TODO - review, can we remove this? READ_ENV_I(release, HIP_NUM_KERNELS_INFLIGHT, 128, "Max number of inflight kernels per stream before active synchronization is forced."); @@ -1375,9 +1370,27 @@ void ihipInit() parseTrigger(HIP_DB_START_API, g_dbStartTriggers); parseTrigger(HIP_DB_STOP_API, g_dbStopTriggers); +}; +//--- +//Function called one-time at initialization time to construct a table of all GPU devices. +//HIP/CUDA uses integer "deviceIds" - these are indexes into this table. +//AMP maintains a table of accelerators, but some are emulated - ie for debug or CPU. +//This function creates a vector with only the GPU accelerators. +//It is called with C++11 call_once, which provided thread-safety. +void ihipInit() +{ + +#if COMPILE_HIP_ATP_MARKER + amdtInitializeActivityLogger(); + amdtScopedMarker("ihipInit", "HIP", NULL); +#endif + + + HipReadEnv(); + /* * Build a table of valid compute devices. diff --git a/hipamd/src/hip_hcc.h b/hipamd/src/hip_hcc.h index 031c92fca7..f59019137b 100644 --- a/hipamd/src/hip_hcc.h +++ b/hipamd/src/hip_hcc.h @@ -62,7 +62,7 @@ extern int HIP_COHERENT_HOST_ALLOC; //--- // Chicken bits for disabling functionality to work around potential issues: -extern int HIP_DISABLE_HW_KERNEL_DEP; +extern int HIP_SYNC_HOST_ALLOC; // Class to assign a short TID to each new thread, for HIP debugging purposes. diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index c43b6991c6..3c727d34fc 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -164,6 +164,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) HIP_SET_DEVICE(); hipError_t hip_status = hipSuccess; + if (HIP_SYNC_HOST_ALLOC) { + hipDeviceSynchronize(); + } + auto ctx = ihipGetTlsDefaultCtx(); if (sizeBytes == 0) { @@ -216,6 +220,9 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) } } } + if (HIP_SYNC_HOST_ALLOC) { + hipDeviceSynchronize(); + } return ihipLogStatus(hip_status); } @@ -993,6 +1000,8 @@ hipError_t hipHostFree(void* ptr) return ihipLogStatus(hipStatus); }; + +// Deprecated: hipError_t hipFreeHost(void* ptr) { return hipHostFree(ptr);