Add ns-level timer for HIP API routines
Refactor some miuses of ihipLogStatus, these should only be in top-level
HIP APIs and should be paired with HIP_API_INIT calls.
[ROCm/clr commit: dc7d993a02]
Этот коммит содержится в:
@@ -60,12 +60,12 @@ hipError_t ihipGetDeviceCount(int *count)
|
||||
*count = g_deviceCnt;
|
||||
|
||||
if (*count > 0) {
|
||||
e = ihipLogStatus(hipSuccess);
|
||||
e = hipSuccess;
|
||||
} else {
|
||||
e = ihipLogStatus(hipErrorNoDevice);
|
||||
e = hipErrorNoDevice;
|
||||
}
|
||||
} else {
|
||||
e = ihipLogStatus(hipErrorInvalidValue);
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
@@ -73,7 +73,7 @@ hipError_t ihipGetDeviceCount(int *count)
|
||||
hipError_t hipGetDeviceCount(int *count)
|
||||
{
|
||||
HIP_INIT_API(count);
|
||||
return ihipGetDeviceCount(count);
|
||||
return ihipLogStatus(ihipGetDeviceCount(count));
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
|
||||
@@ -205,7 +205,7 @@ hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
if(pi == nullptr) {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
auto * hipDevice = ihipGetDevice(device);
|
||||
|
||||
@@ -156,7 +156,7 @@ thread_local TidInfo tls_tidInfo;
|
||||
//=================================================================================================
|
||||
// Top-level "free" functions:
|
||||
//=================================================================================================
|
||||
void recordApiTrace(std::string *fullStr, const std::string &apiStr)
|
||||
uint64_t recordApiTrace(std::string *fullStr, const std::string &apiStr)
|
||||
{
|
||||
auto apiSeqNum = tls_tidInfo.apiSeqNum();
|
||||
auto tid = tls_tidInfo.tid();
|
||||
@@ -178,10 +178,14 @@ void recordApiTrace(std::string *fullStr, const std::string &apiStr)
|
||||
*fullStr += " ";
|
||||
*fullStr += apiStr;
|
||||
|
||||
uint64_t apiStartTick = getTicks();
|
||||
|
||||
|
||||
if (COMPILE_HIP_DB && HIP_TRACE_API) {
|
||||
fprintf (stderr, "%s<<hip-api tid:%s%s\n" , API_COLOR, fullStr->c_str(), API_COLOR_END);
|
||||
fprintf (stderr, "%s<<hip-api tid:%s @%lu%s\n" , API_COLOR, fullStr->c_str(), apiStartTick, API_COLOR_END);
|
||||
}
|
||||
|
||||
return apiStartTick;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -244,23 +244,28 @@ static const DbName dbName [] =
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
static inline uint64_t getTicks()
|
||||
{
|
||||
return hc::get_system_ticks();
|
||||
}
|
||||
|
||||
//---
|
||||
extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
|
||||
extern uint64_t recordApiTrace(std::string *fullStr, const std::string &apiStr);
|
||||
|
||||
#if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
|
||||
#define API_TRACE(forceTrace, ...)\
|
||||
uint64_t hipApiStartTick;\
|
||||
{\
|
||||
tls_tidInfo.incApiSeqNum();\
|
||||
if (forceTrace || (HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1<<TRACE_ALL))))) {\
|
||||
std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
|
||||
std::string fullStr;\
|
||||
recordApiTrace(&fullStr, apiStr);\
|
||||
hipApiStartTick = recordApiTrace(&fullStr, apiStr);\
|
||||
if (HIP_PROFILE_API == 0x1) {MARKER_BEGIN(__func__, "HIP") }\
|
||||
else if (HIP_PROFILE_API == 0x2) {MARKER_BEGIN(fullStr.c_str(), "HIP"); }\
|
||||
}\
|
||||
}
|
||||
|
||||
#else
|
||||
// Swallow API_TRACE
|
||||
#define API_TRACE(IS_CMD, ...)\
|
||||
@@ -302,7 +307,10 @@ extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
|
||||
tls_lastHipError = localHipStatus;\
|
||||
\
|
||||
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1<<TRACE_ALL)) {\
|
||||
fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
|
||||
auto ticks = getTicks() - hipApiStartTick;\
|
||||
fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>> +%lu ns%s\n", \
|
||||
(localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), \
|
||||
__func__, localHipStatus, ihipErrorString(localHipStatus), ticks, API_COLOR_END);\
|
||||
}\
|
||||
if (HIP_PROFILE_API) { MARKER_END(); }\
|
||||
localHipStatus;\
|
||||
|
||||
@@ -374,6 +374,8 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
|
||||
|
||||
hipError_t hipModuleUnload(hipModule_t hmod)
|
||||
{
|
||||
HIP_INIT_API(hmod);
|
||||
|
||||
// TODO - improve this synchronization so it is thread-safe.
|
||||
// Currently we want for all inflight activity to complete, but don't prevent another
|
||||
// thread from launching new kernels before we finish this operation.
|
||||
@@ -408,7 +410,7 @@ hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char
|
||||
hipError_t ret = hipSuccess;
|
||||
|
||||
if (name == nullptr){
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
return (hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
if (ctx == nullptr){
|
||||
@@ -431,7 +433,7 @@ hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char
|
||||
hsa_executable_symbol_t symbol;
|
||||
status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &symbol);
|
||||
if(status != HSA_STATUS_SUCCESS){
|
||||
return ihipLogStatus(hipErrorNotFound);
|
||||
return hipErrorNotFound;
|
||||
}
|
||||
|
||||
status = hsa_executable_symbol_get_info(symbol,
|
||||
@@ -503,11 +505,11 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
if(config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER && config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END){
|
||||
kernArgSize = *(size_t*)(config[3]);
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorNotInitialized);
|
||||
return hipErrorNotInitialized;
|
||||
}
|
||||
|
||||
}else{
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
|
||||
@@ -624,7 +626,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
}
|
||||
else{
|
||||
hipFunction_t func;
|
||||
ihipModuleGetSymbol(&func, hmod, name);
|
||||
ret = ihipModuleGetSymbol(&func, hmod, name);
|
||||
*bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t);
|
||||
*dptr = reinterpret_cast<void*>(func->_object);
|
||||
return ihipLogStatus(ret);
|
||||
|
||||
@@ -140,7 +140,6 @@ hipError_t hipStreamQuery(hipStream_t stream)
|
||||
//---
|
||||
hipError_t hipStreamSynchronize(hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(stream);
|
||||
HIP_INIT_SPECIAL_API(TRACE_SYNC, stream);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
Ссылка в новой задаче
Block a user