diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 49eaf39bf4..0c5a901ea7 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -1609,8 +1609,10 @@ lazy_ptr& GpuAgent::GetBlitObject(const core::Agent& dst_agent, (dst_agent.device_type() == core::Agent::kAmdGpuDevice)) && ("Both devices are CPU agents which is not expected")); - // Determine if Src and Dst devices are same - if ((src_agent.public_handle().handle) == (dst_agent.public_handle().handle)) { + // Determine if Src and Dst devices are same and are the copying device + // Such a copy is in the device local memory, which can only be saturated by a blit kernel. + if ((src_agent.public_handle().handle) == (dst_agent.public_handle().handle) && + (dst_agent.public_handle().handle == public_handle_.handle)) { // If the copy is very small then cache flush overheads can dominate. // Choose a (potentially) SDMA enabled engine to avoid cache flushing. if (size < core::Runtime::runtime_singleton_->flag().force_sdma_size()) { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index fba4bd7853..374aabb25a 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -487,20 +487,15 @@ hsa_status_t Runtime::CopyMemory(void* dst, core::Agent* dst_agent, const void* return block.agentOwner; }; + const bool dst_gpu = (dst_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice); + const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice); + core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent; + // Lookup owning agent if blit kernel is selected or if flag override is set. if ((dst_agent == src_agent) || flag().discover_copy_agents()) { dst_agent = lookupAgent(dst_agent, dst); src_agent = lookupAgent(src_agent, src); } - if (dst_agent == nullptr || src_agent == nullptr) return HSA_STATUS_ERROR_INVALID_AGENT; - - // At least one agent must be available for operation in the current process. - if (!dst_agent->Enabled() && !src_agent->Enabled()) return HSA_STATUS_ERROR_INVALID_AGENT; - - const bool dst_gpu = (dst_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice); - const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice); - core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent; - if (!copy_agent->Enabled()) copy_agent = (copy_agent == src_agent) ? dst_agent : src_agent; return copy_agent->DmaCopy(dst, *dst_agent, src, *src_agent, size, dep_signals, completion_signal); }