SWDEV-355896 - Fix a data corruption error in post processing

The post-processing script cannot handle HIP ops without a correlation
ID. The correlation ID is needed to connect the record to a HIP stream
and originating thread.

This issue was exposed by a change to the tracer API to report
asynchronous activities even if their originating synchronous API
activity (callback) is not enabled. This was a flow in the API.

Also fix an issue with the API filtering. Undefined API names should
not cause an exception, they should be ignored.

Change-Id: Iab2221af6180ade2b9c2eb10c256c3a73d872e9f


[ROCm/roctracer commit: 4856d33959]
This commit is contained in:
Laurent Morichetti
2022-09-09 10:04:16 -07:00
rodzic 1d0f27e2c8
commit 14c153601d
2 zmienionych plików z 30 dodań i 11 usunięć
@@ -283,6 +283,12 @@ class file_plugin_t {
switch (begin->domain) {
case ACTIVITY_DOMAIN_HIP_OPS: {
// The post-processing script cannot handle HIP ops without a correlation ID. The
// correlation ID is needed to connect the record to a HIP stream and originating thread.
// The script could be modified to handle ops without correlation IDs, but for backward
// compatibilty, we are simply dropping the records here.
if (begin->correlation_id == 0) break;
output_file = get_output_file(ACTIVITY_DOMAIN_HIP_OPS);
*output_file << std::dec << begin->begin_ns << ":" << begin->end_ns << " "
<< begin->device_id << ":" << begin->queue_id << " " << name << ":"
@@ -701,21 +701,27 @@ ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
// Enable HSA API callbacks/activity
if (trace_hsa_api) {
std::cout << " HSA-trace(";
std::ostringstream out;
out << " HSA-trace(";
if (hsa_api_vec.size() != 0) {
out << "-*";
for (unsigned i = 0; i < hsa_api_vec.size(); ++i) {
uint32_t cid = HSA_API_ID_NUMBER;
const char* api = hsa_api_vec[i].c_str();
CHECK_ROCTRACER(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, nullptr));
CHECK_ROCTRACER(
roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, nullptr));
std::cout << " " << api;
if (roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, nullptr) ==
ROCTRACER_STATUS_SUCCESS &&
roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, nullptr) ==
ROCTRACER_STATUS_SUCCESS)
out << ' ' << api;
else
warning("Unable to enable HSA_API tracing for invalid operation %s", api);
}
} else {
CHECK_ROCTRACER(
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, nullptr));
out << "*";
}
std::cout << std::endl;
std::cout << out.str() << ')' << std::endl;
}
// Enable HSA GPU activity
@@ -729,30 +735,37 @@ ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
// Enable HIP API callbacks/activity
if (trace_hip_api || trace_hip_activity) {
std::cout << " HIP-trace()" << std::endl;
std::ostringstream out;
out << " HIP-trace(";
// Allocating tracing pool
open_tracing_pool();
// Enable tracing
if (trace_hip_api) {
if (hip_api_vec.size() != 0) {
out << "-*";
for (unsigned i = 0; i < hip_api_vec.size(); ++i) {
uint32_t cid = HIP_API_ID_NONE;
const char* api = hip_api_vec[i].c_str();
CHECK_ROCTRACER(roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, api, &cid, nullptr));
CHECK_ROCTRACER(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, cid,
hip_api_callback, nullptr));
std::cout << " " << api;
if (roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, api, &cid, nullptr) ==
ROCTRACER_STATUS_SUCCESS &&
roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, cid, hip_api_callback,
nullptr) == ROCTRACER_STATUS_SUCCESS)
out << ' ' << api;
else
warning("Unable to enable HIP_API tracing for invalid operation %s", api);
}
} else {
CHECK_ROCTRACER(
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr));
out << "*";
}
}
if (trace_hip_activity) {
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
}
std::cout << out.str() << ')' << std::endl;
}
// Enable PC sampling