diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index 5fd678784c..df93bf27d3 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -382,20 +382,10 @@ hsa_status_t hsa_amd_profiling_get_async_copy_time( core::Agent* agent = signal->async_copy_agent(); - if (agent == NULL) { + if (agent == nullptr) { return HSA_STATUS_ERROR; } - // Validate the embedded agent pointer if the signal is IPC. - if (signal->isIPC()) { - for (auto it : core::Runtime::runtime_singleton_->gpu_agents()) { - if (it == agent) break; - } - // If the agent isn't a GPU then it is from a different process or it's a CPU. - // Assume it's a CPU and illegal uses will generate garbage data same as kernel completion. - agent = core::Runtime::runtime_singleton_->cpu_agents()[0]; - } - if (agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice) { // Translate timestamp from GPU to system domain. static_cast(agent)->TranslateTime(signal, *time); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 488c440830..5940a51af1 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -471,6 +471,7 @@ hsa_status_t Runtime::CopyMemory(void* dst, core::Agent& dst_agent, // For cpu to cpu, fire and forget a copy thread. const bool profiling_enabled = (dst_agent.profiling_enabled() || src_agent.profiling_enabled()); + if (profiling_enabled) completion_signal.async_copy_agent(&dst_agent); std::thread( [](void* dst, const void* src, size_t size, std::vector dep_signals,