Add HIP_SYNC_HOST_ALLOC, HipReadEnv
Этот коммит содержится в:
@@ -92,6 +92,10 @@ int HIP_FORCE_SYNC_COPY = 0;
|
||||
|
||||
int HIP_COHERENT_HOST_ALLOC = 0;
|
||||
|
||||
// TODO - set to 0 once we resolve stability.
|
||||
// USE_ HIP_SYNC_HOST_ALLOC
|
||||
int HIP_SYNC_HOST_ALLOC = 1;
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1270,19 +1274,8 @@ std::string HIP_VISIBLE_DEVICES_callback(void *var_ptr, const char *envVarString
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
//Function called one-time at initialization time to construct a table of all GPU devices.
|
||||
//HIP/CUDA uses integer "deviceIds" - these are indexes into this table.
|
||||
//AMP maintains a table of accelerators, but some are emulated - ie for debug or CPU.
|
||||
//This function creates a vector with only the GPU accelerators.
|
||||
//It is called with C++11 call_once, which provided thread-safety.
|
||||
void ihipInit()
|
||||
void HipReadEnv()
|
||||
{
|
||||
|
||||
#if COMPILE_HIP_ATP_MARKER
|
||||
amdtInitializeActivityLogger();
|
||||
amdtScopedMarker("ihipInit", "HIP", NULL);
|
||||
#endif
|
||||
/*
|
||||
* Environment variables
|
||||
*/
|
||||
@@ -1324,6 +1317,8 @@ void ihipInit()
|
||||
READ_ENV_I(release, HIP_FORCE_P2P_HOST, 0, "Force use of host/staging copy for peer-to-peer copies.1=always use copies, 2=always return false for hipDeviceCanAccessPeer");
|
||||
READ_ENV_I(release, HIP_FORCE_SYNC_COPY, 0, "Force all copies (even hipMemcpyAsync) to use sync copies");
|
||||
|
||||
READ_ENV_I(release, HIP_SYNC_HOST_ALLOC, 0, "Sync before and after all host memory allocations. May help stability");
|
||||
|
||||
// TODO - review, can we remove this?
|
||||
READ_ENV_I(release, HIP_NUM_KERNELS_INFLIGHT, 128, "Max number of inflight kernels per stream before active synchronization is forced.");
|
||||
|
||||
@@ -1375,9 +1370,27 @@ void ihipInit()
|
||||
|
||||
parseTrigger(HIP_DB_START_API, g_dbStartTriggers);
|
||||
parseTrigger(HIP_DB_STOP_API, g_dbStopTriggers);
|
||||
};
|
||||
|
||||
|
||||
|
||||
//---
|
||||
//Function called one-time at initialization time to construct a table of all GPU devices.
|
||||
//HIP/CUDA uses integer "deviceIds" - these are indexes into this table.
|
||||
//AMP maintains a table of accelerators, but some are emulated - ie for debug or CPU.
|
||||
//This function creates a vector with only the GPU accelerators.
|
||||
//It is called with C++11 call_once, which provided thread-safety.
|
||||
void ihipInit()
|
||||
{
|
||||
|
||||
#if COMPILE_HIP_ATP_MARKER
|
||||
amdtInitializeActivityLogger();
|
||||
amdtScopedMarker("ihipInit", "HIP", NULL);
|
||||
#endif
|
||||
|
||||
|
||||
HipReadEnv();
|
||||
|
||||
|
||||
/*
|
||||
* Build a table of valid compute devices.
|
||||
|
||||
@@ -62,7 +62,7 @@ extern int HIP_COHERENT_HOST_ALLOC;
|
||||
|
||||
//---
|
||||
// Chicken bits for disabling functionality to work around potential issues:
|
||||
extern int HIP_DISABLE_HW_KERNEL_DEP;
|
||||
extern int HIP_SYNC_HOST_ALLOC;
|
||||
|
||||
|
||||
// Class to assign a short TID to each new thread, for HIP debugging purposes.
|
||||
|
||||
@@ -164,6 +164,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
HIP_SET_DEVICE();
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
if (HIP_SYNC_HOST_ALLOC) {
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if (sizeBytes == 0) {
|
||||
@@ -216,6 +220,9 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (HIP_SYNC_HOST_ALLOC) {
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
@@ -993,6 +1000,8 @@ hipError_t hipHostFree(void* ptr)
|
||||
return ihipLogStatus(hipStatus);
|
||||
};
|
||||
|
||||
|
||||
// Deprecated:
|
||||
hipError_t hipFreeHost(void* ptr)
|
||||
{
|
||||
return hipHostFree(ptr);
|
||||
|
||||
Ссылка в новой задаче
Block a user