Add ns-level timer for HIP API routines

Refactor some miuses of ihipLogStatus, these should only be in top-level
HIP APIs and should be paired with HIP_API_INIT calls.


[ROCm/clr commit: dc7d993a02]
Этот коммит содержится в:
Ben Sander
2017-10-30 20:18:41 +00:00
родитель fd5b9cb650
Коммит 4df6b371ab
5 изменённых файлов: 30 добавлений и 17 удалений
+5 -5
Просмотреть файл
@@ -60,12 +60,12 @@ hipError_t ihipGetDeviceCount(int *count)
*count = g_deviceCnt;
if (*count > 0) {
e = ihipLogStatus(hipSuccess);
e = hipSuccess;
} else {
e = ihipLogStatus(hipErrorNoDevice);
e = hipErrorNoDevice;
}
} else {
e = ihipLogStatus(hipErrorInvalidValue);
e = hipErrorInvalidValue;
}
return e;
}
@@ -73,7 +73,7 @@ hipError_t ihipGetDeviceCount(int *count)
hipError_t hipGetDeviceCount(int *count)
{
HIP_INIT_API(count);
return ihipGetDeviceCount(count);
return ihipLogStatus(ihipGetDeviceCount(count));
}
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
@@ -205,7 +205,7 @@ hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device
hipError_t e = hipSuccess;
if(pi == nullptr) {
return ihipLogStatus(hipErrorInvalidValue);
return hipErrorInvalidValue;
}
auto * hipDevice = ihipGetDevice(device);
+6 -2
Просмотреть файл
@@ -156,7 +156,7 @@ thread_local TidInfo tls_tidInfo;
//=================================================================================================
// Top-level "free" functions:
//=================================================================================================
void recordApiTrace(std::string *fullStr, const std::string &apiStr)
uint64_t recordApiTrace(std::string *fullStr, const std::string &apiStr)
{
auto apiSeqNum = tls_tidInfo.apiSeqNum();
auto tid = tls_tidInfo.tid();
@@ -178,10 +178,14 @@ void recordApiTrace(std::string *fullStr, const std::string &apiStr)
*fullStr += " ";
*fullStr += apiStr;
uint64_t apiStartTick = getTicks();
if (COMPILE_HIP_DB && HIP_TRACE_API) {
fprintf (stderr, "%s<<hip-api tid:%s%s\n" , API_COLOR, fullStr->c_str(), API_COLOR_END);
fprintf (stderr, "%s<<hip-api tid:%s @%lu%s\n" , API_COLOR, fullStr->c_str(), apiStartTick, API_COLOR_END);
}
return apiStartTick;
}
+12 -4
Просмотреть файл
@@ -244,23 +244,28 @@ static const DbName dbName [] =
#endif
static inline uint64_t getTicks()
{
return hc::get_system_ticks();
}
//---
extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
extern uint64_t recordApiTrace(std::string *fullStr, const std::string &apiStr);
#if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
#define API_TRACE(forceTrace, ...)\
uint64_t hipApiStartTick;\
{\
tls_tidInfo.incApiSeqNum();\
if (forceTrace || (HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1<<TRACE_ALL))))) {\
std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
std::string fullStr;\
recordApiTrace(&fullStr, apiStr);\
hipApiStartTick = recordApiTrace(&fullStr, apiStr);\
if (HIP_PROFILE_API == 0x1) {MARKER_BEGIN(__func__, "HIP") }\
else if (HIP_PROFILE_API == 0x2) {MARKER_BEGIN(fullStr.c_str(), "HIP"); }\
}\
}
#else
// Swallow API_TRACE
#define API_TRACE(IS_CMD, ...)\
@@ -302,7 +307,10 @@ extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
tls_lastHipError = localHipStatus;\
\
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1<<TRACE_ALL)) {\
fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
auto ticks = getTicks() - hipApiStartTick;\
fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>> +%lu ns%s\n", \
(localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), \
__func__, localHipStatus, ihipErrorString(localHipStatus), ticks, API_COLOR_END);\
}\
if (HIP_PROFILE_API) { MARKER_END(); }\
localHipStatus;\
+7 -5
Просмотреть файл
@@ -374,6 +374,8 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
hipError_t hipModuleUnload(hipModule_t hmod)
{
HIP_INIT_API(hmod);
// TODO - improve this synchronization so it is thread-safe.
// Currently we want for all inflight activity to complete, but don't prevent another
// thread from launching new kernels before we finish this operation.
@@ -408,7 +410,7 @@ hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char
hipError_t ret = hipSuccess;
if (name == nullptr){
return ihipLogStatus(hipErrorInvalidValue);
return (hipErrorInvalidValue);
}
if (ctx == nullptr){
@@ -431,7 +433,7 @@ hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char
hsa_executable_symbol_t symbol;
status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &symbol);
if(status != HSA_STATUS_SUCCESS){
return ihipLogStatus(hipErrorNotFound);
return hipErrorNotFound;
}
status = hsa_executable_symbol_get_info(symbol,
@@ -503,11 +505,11 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
if(config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER && config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END){
kernArgSize = *(size_t*)(config[3]);
} else {
return ihipLogStatus(hipErrorNotInitialized);
return hipErrorNotInitialized;
}
}else{
return ihipLogStatus(hipErrorInvalidValue);
return hipErrorInvalidValue;
}
@@ -624,7 +626,7 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
}
else{
hipFunction_t func;
ihipModuleGetSymbol(&func, hmod, name);
ret = ihipModuleGetSymbol(&func, hmod, name);
*bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t);
*dptr = reinterpret_cast<void*>(func->_object);
return ihipLogStatus(ret);
-1
Просмотреть файл
@@ -140,7 +140,6 @@ hipError_t hipStreamQuery(hipStream_t stream)
//---
hipError_t hipStreamSynchronize(hipStream_t stream)
{
HIP_INIT_API(stream);
HIP_INIT_SPECIAL_API(TRACE_SYNC, stream);
hipError_t e = hipSuccess;