SWDEV-295522: Fixing Performance Issue
Removing DEBUG_TRACES and the unnecessary use of roctracer_op_string, made the MS app reporting 78 to 81 stable samples per second, depending on the type of the trace, while the main app without rocprof reports 100 to 106. More detailed numbers will be posted in the ticket.
Change-Id: Ida25d3bfc72047afaa27326d697be76d97564334
[ROCm/roctracer commit: e7327aaac7]
Этот коммит содержится в:
@@ -294,8 +294,6 @@ static inline void CorrelationIdRegistr(const activity_correlation_id_t& correla
|
||||
std::lock_guard<correlation_id_mutex_t> lck(correlation_id_mutex);
|
||||
const auto ret = correlation_id_map.insert({correlation_id, correlation_id_tls});
|
||||
if (ret.second == false) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id is not unique(" << correlation_id << ")");
|
||||
|
||||
DEBUG_TRACE("CorrelationIdRegistr id(%lu) id_tls(%lu)\n", correlation_id, correlation_id_tls);
|
||||
}
|
||||
|
||||
static inline activity_correlation_id_t CorrelationIdLookup(const activity_correlation_id_t& correlation_id) {
|
||||
@@ -304,9 +302,6 @@ static inline activity_correlation_id_t CorrelationIdLookup(const activity_corre
|
||||
if (it == correlation_id_map.end()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id lookup failed(" << correlation_id << ")");
|
||||
const activity_correlation_id_t ret_val = it->second;
|
||||
correlation_id_map.erase(it);
|
||||
|
||||
DEBUG_TRACE("CorrelationIdLookup id(%lu) ret(%lu)\n", correlation_id, ret_val);
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
@@ -394,11 +389,6 @@ void* HIP_SyncApiDataCallback(
|
||||
// Clearing correlatin ID
|
||||
correlation_id_tls = 0;
|
||||
}
|
||||
|
||||
const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0);
|
||||
DEBUG_TRACE("HIP_SyncApiDataCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) correlation_id(%lu) time_ns(%lu)\n",
|
||||
name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()), (data_ptr) ? data_ptr->correlation_id : 0, timer.timestamp_ns());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -488,11 +478,6 @@ void* HIP_SyncActivityCallback(
|
||||
// Clearing correlatin ID
|
||||
correlation_id_tls = 0;
|
||||
}
|
||||
|
||||
const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0);
|
||||
DEBUG_TRACE("HIP_SyncActivityCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) correlation_id(%lu) beg_ns(%lu) end_ns(%lu)\n",
|
||||
name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()), (data_ptr) ? data_ptr->correlation_id : 0, timestamp_ns);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -507,10 +492,6 @@ void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
|
||||
record_ptr->correlation_id = CorrelationIdLookup(record_ptr->correlation_id);
|
||||
if (record_ptr->correlation_id == 0) return;
|
||||
pool->Write(*record_ptr);
|
||||
|
||||
const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HCC_OPS, record_ptr->op, record_ptr->kind);
|
||||
DEBUG_TRACE("HCC_AsyncActivityCallback(\"%s\"): op(%u) kind(%u) record(%p) pool(%p) correlation_id(%d) beg_ns(%lu) end_ns(%lu)\n",
|
||||
name, record_ptr->op, record_ptr->kind, record, pool, record_ptr->correlation_id, record_ptr->begin_ns, record_ptr->end_ns);
|
||||
}
|
||||
|
||||
// Open output file
|
||||
|
||||
@@ -82,21 +82,6 @@ THE SOFTWARE.
|
||||
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
#if DEBUG_TRACE_ON
|
||||
inline static void DEBUG_TRACE(const char* fmt, ...) {
|
||||
constexpr int size = 256;
|
||||
char buf[size];
|
||||
|
||||
va_list valist;
|
||||
va_start(valist, fmt);
|
||||
vsnprintf(buf, size, fmt, valist);
|
||||
printf("%u:%u %s", GetPid(), GetTid(), buf); fflush(stdout);
|
||||
va_end(valist);
|
||||
}
|
||||
#else
|
||||
inline static void DEBUG_TRACE(const char* fmt, ...) {}
|
||||
#endif
|
||||
|
||||
typedef hsa_rt_utils::Timer::timestamp_t timestamp_t;
|
||||
hsa_rt_utils::Timer* timer = NULL;
|
||||
thread_local timestamp_t hsa_begin_timestamp = 0;
|
||||
@@ -472,10 +457,6 @@ void hip_api_callback(
|
||||
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
|
||||
}
|
||||
|
||||
const char * name = roctracer_op_string(domain, cid, 0);
|
||||
DEBUG_TRACE("hip_api_callback(\"%s\") phase(%d): cid(%u) data(%p) entry(%p) name(\"%s\") correlation_id(%lu) timestamp(%lu)\n",
|
||||
name, data->phase, cid, data, entry, (entry) ? entry->name : NULL, data->correlation_id, timestamp);
|
||||
}
|
||||
|
||||
void mark_api_callback(
|
||||
@@ -518,11 +499,7 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
|
||||
const char* str = (domain != ACTIVITY_DOMAIN_EXT_API) ? roctracer_op_string(domain, cid, 0) : strdup("MARK");
|
||||
rec_ss << std::dec << begin_timestamp << ":" << end_timestamp << " " << entry->pid << ":" << entry->tid;
|
||||
oss << std::dec << rec_ss.str() << " " << str;
|
||||
|
||||
const char * name = roctracer_op_string(entry->domain, entry->cid, 0);
|
||||
DEBUG_TRACE("hip_api_flush_cb(\"%s\"): domain(%u) cid(%u) entry(%p) name(\"%s\" correlation_id(%lu) beg(%lu) end(%lu))\n",
|
||||
name, entry->domain, entry->cid, entry, entry->name, correlation_id, begin_timestamp, end_timestamp);
|
||||
|
||||
|
||||
if (domain == ACTIVITY_DOMAIN_HIP_API) {
|
||||
#if HIP_PROF_HIP_API_STRING
|
||||
if (hip_api_stats != NULL) {
|
||||
@@ -643,9 +620,6 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) {
|
||||
|
||||
while (record < end_record) {
|
||||
const char * name = roctracer_op_string(record->domain, record->op, record->kind);
|
||||
DEBUG_TRACE("pool_activity_callback(\"%s\"): domain(%u) op(%u) kind(%u) record(%p) correlation_id(%lu) beg(%lu) end(%lu)\n",
|
||||
name, record->domain, record->op, record->kind, record, record->correlation_id, record->begin_ns, record->end_ns);
|
||||
|
||||
switch(record->domain) {
|
||||
case ACTIVITY_DOMAIN_HCC_OPS:
|
||||
if (hip_memcpy_stats != NULL) {
|
||||
|
||||
Ссылка в новой задаче
Block a user