From cae4ed00568e05f5fd6783d7533bfccd3ac09d17 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 2 Aug 2022 14:18:43 -0400 Subject: [PATCH] Fix GPU destruction when user disabled GPUs excluded by RVD are not expected to have scratch, memory, trap handling nor memory regions set up. Now that these GPUs are added to a new list, early return on agent destruction to prevent bad function calls on destroy. Also fix up broken memory releases between the gpu lists and ugly braces. Change-Id: I52fc6e86ceba0a0383cedc63310eb409515eaf9f [ROCm/ROCR-Runtime commit: 9d2fe1ac2a3d5663e3a853630bbdba460d305710] --- .../runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp | 2 ++ .../runtime/hsa-runtime/core/runtime/runtime.cpp | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 0c5a901ea7..db376d65b3 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -198,6 +198,8 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xna } GpuAgent::~GpuAgent() { + if (!(this)->Enabled()) return; + for (auto& blit : blits_) { if (!blit.empty()) { hsa_status_t status = blit->Destroy(*this); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 374aabb25a..bd6a2d8e6c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -204,12 +204,13 @@ void Runtime::RegisterAgent(Agent* agent, bool Enabled) { if (Enabled) { gpu_agents_.push_back(agent); gpu_ids_.push_back(agent->node_id()); - agents_by_gpuid_[((AMD::GpuAgent*)agent)->KfdGpuID()] = agent; + agents_by_gpuid_[((AMD::GpuAgent*)agent)->KfdGpuID()] = agent; // Assign the first discovered gpu agent as region gpu. if (region_gpu_ == NULL) region_gpu_ = agent; - } else + } else { disabled_gpu_agents_.push_back(agent); + } } } @@ -220,7 +221,7 @@ void Runtime::DestroyAgents() { gpu_agents_.clear(); std::for_each(disabled_gpu_agents_.begin(), disabled_gpu_agents_.end(), DeleteObject()); - gpu_agents_.clear(); + disabled_gpu_agents_.clear(); gpu_ids_.clear(); @@ -1397,6 +1398,9 @@ void Runtime::Unload() { std::for_each(gpu_agents_.begin(), gpu_agents_.end(), DeleteObject()); gpu_agents_.clear(); + std::for_each(disabled_gpu_agents_.begin(), disabled_gpu_agents_.end(), DeleteObject()); + disabled_gpu_agents_.clear(); + async_events_control_.Shutdown(); if (vm_fault_signal_ != nullptr) {