From 14c153601dc1b511b9dd212ad39cc723a7df316c Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Fri, 9 Sep 2022 10:04:16 -0700 Subject: [PATCH] SWDEV-355896 - Fix a data corruption error in post processing The post-processing script cannot handle HIP ops without a correlation ID. The correlation ID is needed to connect the record to a HIP stream and originating thread. This issue was exposed by a change to the tracer API to report asynchronous activities even if their originating synchronous API activity (callback) is not enabled. This was a flow in the API. Also fix an issue with the API filtering. Undefined API names should not cause an exception, they should be ignored. Change-Id: Iab2221af6180ade2b9c2eb10c256c3a73d872e9f [ROCm/roctracer commit: 4856d339594d4e54fe55f9b2717be5794970595b] --- projects/roctracer/plugin/file/file.cpp | 6 ++++ .../roctracer/src/tracer_tool/tracer_tool.cpp | 35 +++++++++++++------ 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/projects/roctracer/plugin/file/file.cpp b/projects/roctracer/plugin/file/file.cpp index a5f014f732..c0d78b1377 100644 --- a/projects/roctracer/plugin/file/file.cpp +++ b/projects/roctracer/plugin/file/file.cpp @@ -283,6 +283,12 @@ class file_plugin_t { switch (begin->domain) { case ACTIVITY_DOMAIN_HIP_OPS: { + // The post-processing script cannot handle HIP ops without a correlation ID. The + // correlation ID is needed to connect the record to a HIP stream and originating thread. + // The script could be modified to handle ops without correlation IDs, but for backward + // compatibilty, we are simply dropping the records here. + if (begin->correlation_id == 0) break; + output_file = get_output_file(ACTIVITY_DOMAIN_HIP_OPS); *output_file << std::dec << begin->begin_ns << ":" << begin->end_ns << " " << begin->device_id << ":" << begin->queue_id << " " << name << ":" diff --git a/projects/roctracer/src/tracer_tool/tracer_tool.cpp b/projects/roctracer/src/tracer_tool/tracer_tool.cpp index 90eeb8f7ee..db21e2cbc9 100644 --- a/projects/roctracer/src/tracer_tool/tracer_tool.cpp +++ b/projects/roctracer/src/tracer_tool/tracer_tool.cpp @@ -701,21 +701,27 @@ ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version, // Enable HSA API callbacks/activity if (trace_hsa_api) { - std::cout << " HSA-trace("; + std::ostringstream out; + out << " HSA-trace("; if (hsa_api_vec.size() != 0) { + out << "-*"; for (unsigned i = 0; i < hsa_api_vec.size(); ++i) { uint32_t cid = HSA_API_ID_NUMBER; const char* api = hsa_api_vec[i].c_str(); - CHECK_ROCTRACER(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, nullptr)); - CHECK_ROCTRACER( - roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, nullptr)); - std::cout << " " << api; + if (roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, nullptr) == + ROCTRACER_STATUS_SUCCESS && + roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, nullptr) == + ROCTRACER_STATUS_SUCCESS) + out << ' ' << api; + else + warning("Unable to enable HSA_API tracing for invalid operation %s", api); } } else { CHECK_ROCTRACER( roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, nullptr)); + out << "*"; } - std::cout << std::endl; + std::cout << out.str() << ')' << std::endl; } // Enable HSA GPU activity @@ -729,30 +735,37 @@ ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version, // Enable HIP API callbacks/activity if (trace_hip_api || trace_hip_activity) { - std::cout << " HIP-trace()" << std::endl; + std::ostringstream out; + out << " HIP-trace("; // Allocating tracing pool open_tracing_pool(); // Enable tracing if (trace_hip_api) { if (hip_api_vec.size() != 0) { + out << "-*"; for (unsigned i = 0; i < hip_api_vec.size(); ++i) { uint32_t cid = HIP_API_ID_NONE; const char* api = hip_api_vec[i].c_str(); - CHECK_ROCTRACER(roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, api, &cid, nullptr)); - CHECK_ROCTRACER(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, cid, - hip_api_callback, nullptr)); - std::cout << " " << api; + if (roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, api, &cid, nullptr) == + ROCTRACER_STATUS_SUCCESS && + roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, cid, hip_api_callback, + nullptr) == ROCTRACER_STATUS_SUCCESS) + out << ' ' << api; + else + warning("Unable to enable HIP_API tracing for invalid operation %s", api); } } else { CHECK_ROCTRACER( roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr)); + out << "*"; } } if (trace_hip_activity) { CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS)); } + std::cout << out.str() << ')' << std::endl; } // Enable PC sampling