SWDEV-295522: Fixing Performance Issue

Removing DEBUG_TRACES and the unnecessary use of roctracer_op_string, made the MS app reporting 78 to 81 stable samples per second, depending on the type of the trace, while the main app without rocprof reports 100 to 106. More detailed numbers will be posted in the ticket.

Change-Id: Ida25d3bfc72047afaa27326d697be76d97564334


[ROCm/roctracer commit: e7327aaac7]
Этот коммит содержится в:
Ammar ELWazir
2022-04-07 00:07:24 +00:00
родитель f1de3ae0e5
Коммит 35ea06abd0
2 изменённых файлов: 1 добавлений и 46 удалений
-19
Просмотреть файл
@@ -294,8 +294,6 @@ static inline void CorrelationIdRegistr(const activity_correlation_id_t& correla
std::lock_guard<correlation_id_mutex_t> lck(correlation_id_mutex);
const auto ret = correlation_id_map.insert({correlation_id, correlation_id_tls});
if (ret.second == false) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id is not unique(" << correlation_id << ")");
DEBUG_TRACE("CorrelationIdRegistr id(%lu) id_tls(%lu)\n", correlation_id, correlation_id_tls);
}
static inline activity_correlation_id_t CorrelationIdLookup(const activity_correlation_id_t& correlation_id) {
@@ -304,9 +302,6 @@ static inline activity_correlation_id_t CorrelationIdLookup(const activity_corre
if (it == correlation_id_map.end()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id lookup failed(" << correlation_id << ")");
const activity_correlation_id_t ret_val = it->second;
correlation_id_map.erase(it);
DEBUG_TRACE("CorrelationIdLookup id(%lu) ret(%lu)\n", correlation_id, ret_val);
return ret_val;
}
@@ -394,11 +389,6 @@ void* HIP_SyncApiDataCallback(
// Clearing correlatin ID
correlation_id_tls = 0;
}
const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0);
DEBUG_TRACE("HIP_SyncApiDataCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) correlation_id(%lu) time_ns(%lu)\n",
name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()), (data_ptr) ? data_ptr->correlation_id : 0, timer.timestamp_ns());
return ret;
}
@@ -488,11 +478,6 @@ void* HIP_SyncActivityCallback(
// Clearing correlatin ID
correlation_id_tls = 0;
}
const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0);
DEBUG_TRACE("HIP_SyncActivityCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) correlation_id(%lu) beg_ns(%lu) end_ns(%lu)\n",
name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()), (data_ptr) ? data_ptr->correlation_id : 0, timestamp_ns);
return ret;
}
@@ -507,10 +492,6 @@ void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
record_ptr->correlation_id = CorrelationIdLookup(record_ptr->correlation_id);
if (record_ptr->correlation_id == 0) return;
pool->Write(*record_ptr);
const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HCC_OPS, record_ptr->op, record_ptr->kind);
DEBUG_TRACE("HCC_AsyncActivityCallback(\"%s\"): op(%u) kind(%u) record(%p) pool(%p) correlation_id(%d) beg_ns(%lu) end_ns(%lu)\n",
name, record_ptr->op, record_ptr->kind, record, pool, record_ptr->correlation_id, record_ptr->begin_ns, record_ptr->end_ns);
}
// Open output file
+1 -27
Просмотреть файл
@@ -82,21 +82,6 @@ THE SOFTWARE.
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
#if DEBUG_TRACE_ON
inline static void DEBUG_TRACE(const char* fmt, ...) {
constexpr int size = 256;
char buf[size];
va_list valist;
va_start(valist, fmt);
vsnprintf(buf, size, fmt, valist);
printf("%u:%u %s", GetPid(), GetTid(), buf); fflush(stdout);
va_end(valist);
}
#else
inline static void DEBUG_TRACE(const char* fmt, ...) {}
#endif
typedef hsa_rt_utils::Timer::timestamp_t timestamp_t;
hsa_rt_utils::Timer* timer = NULL;
thread_local timestamp_t hsa_begin_timestamp = 0;
@@ -472,10 +457,6 @@ void hip_api_callback(
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
}
const char * name = roctracer_op_string(domain, cid, 0);
DEBUG_TRACE("hip_api_callback(\"%s\") phase(%d): cid(%u) data(%p) entry(%p) name(\"%s\") correlation_id(%lu) timestamp(%lu)\n",
name, data->phase, cid, data, entry, (entry) ? entry->name : NULL, data->correlation_id, timestamp);
}
void mark_api_callback(
@@ -518,11 +499,7 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
const char* str = (domain != ACTIVITY_DOMAIN_EXT_API) ? roctracer_op_string(domain, cid, 0) : strdup("MARK");
rec_ss << std::dec << begin_timestamp << ":" << end_timestamp << " " << entry->pid << ":" << entry->tid;
oss << std::dec << rec_ss.str() << " " << str;
const char * name = roctracer_op_string(entry->domain, entry->cid, 0);
DEBUG_TRACE("hip_api_flush_cb(\"%s\"): domain(%u) cid(%u) entry(%p) name(\"%s\" correlation_id(%lu) beg(%lu) end(%lu))\n",
name, entry->domain, entry->cid, entry, entry->name, correlation_id, begin_timestamp, end_timestamp);
if (domain == ACTIVITY_DOMAIN_HIP_API) {
#if HIP_PROF_HIP_API_STRING
if (hip_api_stats != NULL) {
@@ -643,9 +620,6 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) {
while (record < end_record) {
const char * name = roctracer_op_string(record->domain, record->op, record->kind);
DEBUG_TRACE("pool_activity_callback(\"%s\"): domain(%u) op(%u) kind(%u) record(%p) correlation_id(%lu) beg(%lu) end(%lu)\n",
name, record->domain, record->op, record->kind, record, record->correlation_id, record->begin_ns, record->end_ns);
switch(record->domain) {
case ACTIVITY_DOMAIN_HCC_OPS:
if (hip_memcpy_stats != NULL) {