From f887f2fc6f0f2c3373986b34b6d60de9eb861e5f Mon Sep 17 00:00:00 2001 From: Rakesh Roy Date: Tue, 27 Jun 2023 20:57:49 +0530 Subject: [PATCH] SWDEV-405329 - Fix cuMask issue for WGP mode - Enable CUs adjacent pairwise for WGP mode - In HostQueue::terminate() do not segfault if virtual device hasn't been created Change-Id: I94402ff333308af5824878086cc238b3993d534d [ROCm/clr commit: 8c1232124e7e72c488521cb21b04a8bd6428145a] --- projects/clr/rocclr/device/rocm/rocdevice.cpp | 27 ++++++++++++++++++- projects/clr/rocclr/platform/commandqueue.cpp | 12 +++++---- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 70fd81f92e..e7c2b346e2 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -2829,7 +2829,32 @@ hsa_queue_t* Device::acquireQueue(uint32_t queue_size_hint, bool coop_queue, ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "Setting CU mask 0x%s for hardware queue %p", ss.str().c_str(), queue); - hsa_status_t status = hsa_amd_queue_cu_set_mask(queue, mask.size() * 32, mask.data()); + std::vector final_mask = {}; + // hsa_amd_queue_cu_set_mask expects each bit in cuMask to represent each CU + // For wgp mode: Each wgp consists of 2 CUs and CUs must be adjacent pairwise enabled + // Convert each bit in the cuMask from wgp to cu by duplicating it + if (settings().enableWgpMode_) { + final_mask.resize(mask.size() * 2, 0); + + for (int i = 0; i < mask.size(); i++) { + for (int j = 0; j < 16; j++) { + // Convert least significant 16 bits + if (((mask[i] >> j) & 0x1) == 0x1) { + final_mask[2 * i] |= (0x3 << (2 * j)); + } + + // Convert most significant 16 bits + if (((mask[i] >> (16 + j)) & 0x1) == 0x1) { + final_mask[2 * i + 1] |= (0x3 << (2 * j)); + } + } + } + } else { + final_mask = mask; + } + + hsa_status_t status = hsa_amd_queue_cu_set_mask(queue, + final_mask.size() * 32, final_mask.data()); if (status != HSA_STATUS_SUCCESS) { DevLogError("Device::acquireQueue: hsa_amd_queue_cu_set_mask failed!"); hsa_queue_destroy(queue); diff --git a/projects/clr/rocclr/platform/commandqueue.cpp b/projects/clr/rocclr/platform/commandqueue.cpp index ce8ba4ae37..7bf95a2fc6 100644 --- a/projects/clr/rocclr/platform/commandqueue.cpp +++ b/projects/clr/rocclr/platform/commandqueue.cpp @@ -60,11 +60,13 @@ HostQueue::HostQueue(Context& context, Device& device, cl_command_queue_properti bool HostQueue::terminate() { if (AMD_DIRECT_DISPATCH) { - Command* marker = new Marker(*this, true); - if (marker != nullptr) { - marker->enqueue(); - marker->awaitCompletion(); - marker->release(); + if (vdev() != nullptr) { + Command* marker = new Marker(*this, true); + if (marker != nullptr) { + marker->enqueue(); + marker->awaitCompletion(); + marker->release(); + } } thread_.Release(); thread_.acceptingCommands_ = false;