diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h index 71b1d3fa44..18ef156c39 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h @@ -400,6 +400,9 @@ class GpuAgent : public GpuAgentInt { // List of agents connected via xGMI std::vector xgmi_peer_list_; + // Protects xgmi_peer_list_ + KernelMutex xgmi_peer_list_lock_; + // @brief AQL queues for cache management and blit compute usage. enum QueueEnum { QueueUtility, // Cache management and device to {host,device} blit compute diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index da7932d951..c19b91fdb4 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -1483,6 +1483,8 @@ lazy_ptr& GpuAgent::GetXgmiBlit(const core::Agent& dst_agent) { uint32_t xgmi_engine_cnt = properties_.NumSdmaXgmiEngines; assert((xgmi_engine_cnt > 0) && ("Illegal condition, should not happen")); + ScopedAcquire lock(&xgmi_peer_list_lock_); + for (uint32_t idx = 0; idx < xgmi_peer_list_.size(); idx++) { uint64_t dst_handle = dst_agent.public_handle().handle; uint64_t peer_handle = xgmi_peer_list_[idx]->public_handle().handle;