Add HIP_SYNC_HOST_ALLOC, HipReadEnv

[ROCm/clr commit: db3f4889ca]
Этот коммит содержится в:
Ben Sander
2017-01-19 23:22:06 -06:00
родитель 7a992b9fc3
Коммит 81488d5d00
3 изменённых файлов: 35 добавлений и 13 удалений
+25 -12
Просмотреть файл
@@ -92,6 +92,10 @@ int HIP_FORCE_SYNC_COPY = 0;
int HIP_COHERENT_HOST_ALLOC = 0;
// TODO - set to 0 once we resolve stability.
// USE_ HIP_SYNC_HOST_ALLOC
int HIP_SYNC_HOST_ALLOC = 1;
@@ -1270,19 +1274,8 @@ std::string HIP_VISIBLE_DEVICES_callback(void *var_ptr, const char *envVarString
}
//---
//Function called one-time at initialization time to construct a table of all GPU devices.
//HIP/CUDA uses integer "deviceIds" - these are indexes into this table.
//AMP maintains a table of accelerators, but some are emulated - ie for debug or CPU.
//This function creates a vector with only the GPU accelerators.
//It is called with C++11 call_once, which provided thread-safety.
void ihipInit()
void HipReadEnv()
{
#if COMPILE_HIP_ATP_MARKER
amdtInitializeActivityLogger();
amdtScopedMarker("ihipInit", "HIP", NULL);
#endif
/*
* Environment variables
*/
@@ -1324,6 +1317,8 @@ void ihipInit()
READ_ENV_I(release, HIP_FORCE_P2P_HOST, 0, "Force use of host/staging copy for peer-to-peer copies.1=always use copies, 2=always return false for hipDeviceCanAccessPeer");
READ_ENV_I(release, HIP_FORCE_SYNC_COPY, 0, "Force all copies (even hipMemcpyAsync) to use sync copies");
READ_ENV_I(release, HIP_SYNC_HOST_ALLOC, 0, "Sync before and after all host memory allocations. May help stability");
// TODO - review, can we remove this?
READ_ENV_I(release, HIP_NUM_KERNELS_INFLIGHT, 128, "Max number of inflight kernels per stream before active synchronization is forced.");
@@ -1375,9 +1370,27 @@ void ihipInit()
parseTrigger(HIP_DB_START_API, g_dbStartTriggers);
parseTrigger(HIP_DB_STOP_API, g_dbStopTriggers);
};
//---
//Function called one-time at initialization time to construct a table of all GPU devices.
//HIP/CUDA uses integer "deviceIds" - these are indexes into this table.
//AMP maintains a table of accelerators, but some are emulated - ie for debug or CPU.
//This function creates a vector with only the GPU accelerators.
//It is called with C++11 call_once, which provided thread-safety.
void ihipInit()
{
#if COMPILE_HIP_ATP_MARKER
amdtInitializeActivityLogger();
amdtScopedMarker("ihipInit", "HIP", NULL);
#endif
HipReadEnv();
/*
* Build a table of valid compute devices.
+1 -1
Просмотреть файл
@@ -62,7 +62,7 @@ extern int HIP_COHERENT_HOST_ALLOC;
//---
// Chicken bits for disabling functionality to work around potential issues:
extern int HIP_DISABLE_HW_KERNEL_DEP;
extern int HIP_SYNC_HOST_ALLOC;
// Class to assign a short TID to each new thread, for HIP debugging purposes.
+9
Просмотреть файл
@@ -164,6 +164,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
HIP_SET_DEVICE();
hipError_t hip_status = hipSuccess;
if (HIP_SYNC_HOST_ALLOC) {
hipDeviceSynchronize();
}
auto ctx = ihipGetTlsDefaultCtx();
if (sizeBytes == 0) {
@@ -216,6 +220,9 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
}
}
}
if (HIP_SYNC_HOST_ALLOC) {
hipDeviceSynchronize();
}
return ihipLogStatus(hip_status);
}
@@ -993,6 +1000,8 @@ hipError_t hipHostFree(void* ptr)
return ihipLogStatus(hipStatus);
};
// Deprecated:
hipError_t hipFreeHost(void* ptr)
{
return hipHostFree(ptr);