SWDEV-295522: Fixing Performance Issue

Removing DEBUG_TRACES and the unnecessary use of roctracer_op_string, made the MS app reporting 78 to 81 stable samples per second, depending on the type of the trace, while the main app without rocprof reports 100 to 106. More detailed numbers will be posted in the ticket.

Change-Id: Ida25d3bfc72047afaa27326d697be76d97564334
This commit is contained in:
Ammar ELWazir
2022-04-07 00:07:24 +00:00
parent 7ee4f87b73
commit e7327aaac7
2 changed files with 1 additions and 46 deletions
-19
View File
@@ -294,8 +294,6 @@ static inline void CorrelationIdRegistr(const activity_correlation_id_t& correla
std::lock_guard<correlation_id_mutex_t> lck(correlation_id_mutex);
const auto ret = correlation_id_map.insert({correlation_id, correlation_id_tls});
if (ret.second == false) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id is not unique(" << correlation_id << ")");
DEBUG_TRACE("CorrelationIdRegistr id(%lu) id_tls(%lu)\n", correlation_id, correlation_id_tls);
}
static inline activity_correlation_id_t CorrelationIdLookup(const activity_correlation_id_t& correlation_id) {
@@ -304,9 +302,6 @@ static inline activity_correlation_id_t CorrelationIdLookup(const activity_corre
if (it == correlation_id_map.end()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id lookup failed(" << correlation_id << ")");
const activity_correlation_id_t ret_val = it->second;
correlation_id_map.erase(it);
DEBUG_TRACE("CorrelationIdLookup id(%lu) ret(%lu)\n", correlation_id, ret_val);
return ret_val;
}
@@ -394,11 +389,6 @@ void* HIP_SyncApiDataCallback(
// Clearing correlatin ID
correlation_id_tls = 0;
}
const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0);
DEBUG_TRACE("HIP_SyncApiDataCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) correlation_id(%lu) time_ns(%lu)\n",
name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()), (data_ptr) ? data_ptr->correlation_id : 0, timer.timestamp_ns());
return ret;
}
@@ -488,11 +478,6 @@ void* HIP_SyncActivityCallback(
// Clearing correlatin ID
correlation_id_tls = 0;
}
const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0);
DEBUG_TRACE("HIP_SyncActivityCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) correlation_id(%lu) beg_ns(%lu) end_ns(%lu)\n",
name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()), (data_ptr) ? data_ptr->correlation_id : 0, timestamp_ns);
return ret;
}
@@ -507,10 +492,6 @@ void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
record_ptr->correlation_id = CorrelationIdLookup(record_ptr->correlation_id);
if (record_ptr->correlation_id == 0) return;
pool->Write(*record_ptr);
const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HCC_OPS, record_ptr->op, record_ptr->kind);
DEBUG_TRACE("HCC_AsyncActivityCallback(\"%s\"): op(%u) kind(%u) record(%p) pool(%p) correlation_id(%d) beg_ns(%lu) end_ns(%lu)\n",
name, record_ptr->op, record_ptr->kind, record, pool, record_ptr->correlation_id, record_ptr->begin_ns, record_ptr->end_ns);
}
// Open output file
+1 -27
View File
@@ -82,21 +82,6 @@ THE SOFTWARE.
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
#if DEBUG_TRACE_ON
inline static void DEBUG_TRACE(const char* fmt, ...) {
constexpr int size = 256;
char buf[size];
va_list valist;
va_start(valist, fmt);
vsnprintf(buf, size, fmt, valist);
printf("%u:%u %s", GetPid(), GetTid(), buf); fflush(stdout);
va_end(valist);
}
#else
inline static void DEBUG_TRACE(const char* fmt, ...) {}
#endif
typedef hsa_rt_utils::Timer::timestamp_t timestamp_t;
hsa_rt_utils::Timer* timer = NULL;
thread_local timestamp_t hsa_begin_timestamp = 0;
@@ -472,10 +457,6 @@ void hip_api_callback(
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
}
const char * name = roctracer_op_string(domain, cid, 0);
DEBUG_TRACE("hip_api_callback(\"%s\") phase(%d): cid(%u) data(%p) entry(%p) name(\"%s\") correlation_id(%lu) timestamp(%lu)\n",
name, data->phase, cid, data, entry, (entry) ? entry->name : NULL, data->correlation_id, timestamp);
}
void mark_api_callback(
@@ -518,11 +499,7 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
const char* str = (domain != ACTIVITY_DOMAIN_EXT_API) ? roctracer_op_string(domain, cid, 0) : strdup("MARK");
rec_ss << std::dec << begin_timestamp << ":" << end_timestamp << " " << entry->pid << ":" << entry->tid;
oss << std::dec << rec_ss.str() << " " << str;
const char * name = roctracer_op_string(entry->domain, entry->cid, 0);
DEBUG_TRACE("hip_api_flush_cb(\"%s\"): domain(%u) cid(%u) entry(%p) name(\"%s\" correlation_id(%lu) beg(%lu) end(%lu))\n",
name, entry->domain, entry->cid, entry, entry->name, correlation_id, begin_timestamp, end_timestamp);
if (domain == ACTIVITY_DOMAIN_HIP_API) {
#if HIP_PROF_HIP_API_STRING
if (hip_api_stats != NULL) {
@@ -643,9 +620,6 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) {
while (record < end_record) {
const char * name = roctracer_op_string(record->domain, record->op, record->kind);
DEBUG_TRACE("pool_activity_callback(\"%s\"): domain(%u) op(%u) kind(%u) record(%p) correlation_id(%lu) beg(%lu) end(%lu)\n",
name, record->domain, record->op, record->kind, record, record->correlation_id, record->begin_ns, record->end_ns);
switch(record->domain) {
case ACTIVITY_DOMAIN_HCC_OPS:
if (hip_memcpy_stats != NULL) {