From d2b9d267b2d366e70877f150522e9119968df17d Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Thu, 20 Aug 2020 13:33:56 -0400 Subject: [PATCH] SWDEV-248499 Fix a crash when printf is used with cooperative kernels root cause - cooperative queue is not inserted into queuePool_ (HSA queues) of ROC device calss causing a crash when creating hostcall buffers for printf Change-Id: I3f9aceb4e5fe6a7c7a2a549a4bb0a3511fe02799 --- rocclr/device/rocm/rocdevice.cpp | 39 ++++++++++++++++++++++--------- rocclr/device/rocm/rocdevice.hpp | 3 ++- rocclr/device/rocm/rocvirtual.cpp | 2 +- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 7fc422d8fc..8b51d4080d 100755 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -142,6 +142,7 @@ Device::Device(hsa_agent_t bkendDevice) , vgpusAccess_("Virtual GPU List Ops Lock", true) , hsa_exclusive_gpu_access_(false) , queuePool_(QueuePriority::Total) + , coopHostcallBuffer_(nullptr) , numOfVgpus_(0) { group_segment_.handle = 0; system_segment_.handle = 0; @@ -233,6 +234,12 @@ Device::~Device() { } delete[] p2p_agents_list_; + + if (coopHostcallBuffer_) { + disableHostcalls(coopHostcallBuffer_); + context().svmFree(coopHostcallBuffer_); + coopHostcallBuffer_ = nullptr; + } } bool NullDevice::initCompiler(bool isOffline) { #if defined(WITH_COMPILER_LIB) @@ -2323,20 +2330,26 @@ void Device::releaseQueue(hsa_queue_t* queue) { hsa_queue_destroy(queue); } -void* Device::getOrCreateHostcallBuffer(hsa_queue_t* queue) { +void* Device::getOrCreateHostcallBuffer(hsa_queue_t* queue, bool coop_queue) { decltype(queuePool_)::value_type::iterator qIter; - for (auto& it : queuePool_) { - qIter = it.find(queue); - if (qIter != it.end()) { - break; + + if (!coop_queue) { + for (auto &it : queuePool_) { + qIter = it.find(queue); + if (qIter != it.end()) { + break; + } } - } - assert(qIter != queuePool_[QueuePriority::High].end()); + assert(qIter != queuePool_[QueuePriority::High].end()); - auto& qInfo = qIter->second; - if (qInfo.hostcallBuffer_) { - return qInfo.hostcallBuffer_; + if (qIter->second.hostcallBuffer_) { + return qIter->second.hostcallBuffer_; + } + } else { + if (coopHostcallBuffer_) { + return coopHostcallBuffer_; + } } // The number of packets required in each buffer is at least equal to the @@ -2355,7 +2368,11 @@ void* Device::getOrCreateHostcallBuffer(hsa_queue_t* queue) { } ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "Created hostcall buffer %p for hardware queue %p", buffer, queue); - qInfo.hostcallBuffer_ = buffer; + if (!coop_queue) { + qIter->second.hostcallBuffer_ = buffer; + } else { + coopHostcallBuffer_ = buffer; + } if (!enableHostcalls(buffer, numPackets)) { ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, "Failed to register hostcall buffer %p with listener", buffer); diff --git a/rocclr/device/rocm/rocdevice.hpp b/rocclr/device/rocm/rocdevice.hpp index 237847b541..9980ba499e 100755 --- a/rocclr/device/rocm/rocdevice.hpp +++ b/rocclr/device/rocm/rocdevice.hpp @@ -465,7 +465,7 @@ class Device : public NullDevice { //! For the given HSA queue, return an existing hostcall buffer or create a //! new one. queuePool_ keeps a mapping from HSA queue to hostcall buffer. - void* getOrCreateHostcallBuffer(hsa_queue_t* queue); + void* getOrCreateHostcallBuffer(hsa_queue_t* queue, bool coop_queue = false); //! Return multi GPU grid launch sync buffer address MGSync() const { return mg_sync_; } @@ -531,6 +531,7 @@ class Device : public NullDevice { //! returns a hsa queue from queuePool with least refCount and updates the refCount as well hsa_queue_t* getQueueFromPool(const uint qIndex); + void* coopHostcallBuffer_; public: amd::Atomic numOfVgpus_; //!< Virtual gpu unique index diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index b3a02aedda..bc6fb0bb31 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -2226,7 +2226,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const } case amd::KernelParameterDescriptor::HiddenHostcallBuffer: { if (amd::IS_HIP) { - auto buffer = roc_device_.getOrCreateHostcallBuffer(gpu_queue_); + auto buffer = roc_device_.getOrCreateHostcallBuffer(gpu_queue_, coopGroups); if (!buffer) { ClPrint(amd::LOG_ERROR, amd::LOG_KERN, "Kernel expects a hostcall buffer, but none found");