SWDEV-248499 Fix a crash when printf is used with cooperative kernels

root cause - cooperative queue is not inserted into queuePool_ (HSA queues) of ROC device calss causing a crash when creating hostcall buffers for printf

Change-Id: I3f9aceb4e5fe6a7c7a2a549a4bb0a3511fe02799
此提交包含在:
Aryan Salmanpour
2020-08-20 13:33:56 -04:00
提交者 Aryan Salmanpour
父節點 9110b09227
當前提交 d2b9d267b2
共有 3 個檔案被更改,包括 31 行新增13 行删除
+28 -11
查看文件
@@ -142,6 +142,7 @@ Device::Device(hsa_agent_t bkendDevice)
, vgpusAccess_("Virtual GPU List Ops Lock", true)
, hsa_exclusive_gpu_access_(false)
, queuePool_(QueuePriority::Total)
, coopHostcallBuffer_(nullptr)
, numOfVgpus_(0) {
group_segment_.handle = 0;
system_segment_.handle = 0;
@@ -233,6 +234,12 @@ Device::~Device() {
}
delete[] p2p_agents_list_;
if (coopHostcallBuffer_) {
disableHostcalls(coopHostcallBuffer_);
context().svmFree(coopHostcallBuffer_);
coopHostcallBuffer_ = nullptr;
}
}
bool NullDevice::initCompiler(bool isOffline) {
#if defined(WITH_COMPILER_LIB)
@@ -2323,20 +2330,26 @@ void Device::releaseQueue(hsa_queue_t* queue) {
hsa_queue_destroy(queue);
}
void* Device::getOrCreateHostcallBuffer(hsa_queue_t* queue) {
void* Device::getOrCreateHostcallBuffer(hsa_queue_t* queue, bool coop_queue) {
decltype(queuePool_)::value_type::iterator qIter;
for (auto& it : queuePool_) {
qIter = it.find(queue);
if (qIter != it.end()) {
break;
if (!coop_queue) {
for (auto &it : queuePool_) {
qIter = it.find(queue);
if (qIter != it.end()) {
break;
}
}
}
assert(qIter != queuePool_[QueuePriority::High].end());
assert(qIter != queuePool_[QueuePriority::High].end());
auto& qInfo = qIter->second;
if (qInfo.hostcallBuffer_) {
return qInfo.hostcallBuffer_;
if (qIter->second.hostcallBuffer_) {
return qIter->second.hostcallBuffer_;
}
} else {
if (coopHostcallBuffer_) {
return coopHostcallBuffer_;
}
}
// The number of packets required in each buffer is at least equal to the
@@ -2355,7 +2368,11 @@ void* Device::getOrCreateHostcallBuffer(hsa_queue_t* queue) {
}
ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "Created hostcall buffer %p for hardware queue %p", buffer,
queue);
qInfo.hostcallBuffer_ = buffer;
if (!coop_queue) {
qIter->second.hostcallBuffer_ = buffer;
} else {
coopHostcallBuffer_ = buffer;
}
if (!enableHostcalls(buffer, numPackets)) {
ClPrint(amd::LOG_ERROR, amd::LOG_QUEUE, "Failed to register hostcall buffer %p with listener",
buffer);
+2 -1
查看文件
@@ -465,7 +465,7 @@ class Device : public NullDevice {
//! For the given HSA queue, return an existing hostcall buffer or create a
//! new one. queuePool_ keeps a mapping from HSA queue to hostcall buffer.
void* getOrCreateHostcallBuffer(hsa_queue_t* queue);
void* getOrCreateHostcallBuffer(hsa_queue_t* queue, bool coop_queue = false);
//! Return multi GPU grid launch sync buffer
address MGSync() const { return mg_sync_; }
@@ -531,6 +531,7 @@ class Device : public NullDevice {
//! returns a hsa queue from queuePool with least refCount and updates the refCount as well
hsa_queue_t* getQueueFromPool(const uint qIndex);
void* coopHostcallBuffer_;
public:
amd::Atomic<uint> numOfVgpus_; //!< Virtual gpu unique index
+1 -1
查看文件
@@ -2226,7 +2226,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
}
case amd::KernelParameterDescriptor::HiddenHostcallBuffer: {
if (amd::IS_HIP) {
auto buffer = roc_device_.getOrCreateHostcallBuffer(gpu_queue_);
auto buffer = roc_device_.getOrCreateHostcallBuffer(gpu_queue_, coopGroups);
if (!buffer) {
ClPrint(amd::LOG_ERROR, amd::LOG_KERN,
"Kernel expects a hostcall buffer, but none found");