From ee124dda09f666f214f5ffbd6194e23293bb02be Mon Sep 17 00:00:00 2001 From: "Chaudhary, Jatin Jaikishan" Date: Mon, 11 Aug 2025 05:47:18 +0100 Subject: [PATCH 1/2] SWDEV-508540 - fix the max blocks per multiprocessor calculation (#737) --- hipamd/src/hip_device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/src/hip_device.cpp b/hipamd/src/hip_device.cpp index fef71fb7b7..8f1ceff2fb 100644 --- a/hipamd/src/hip_device.cpp +++ b/hipamd/src/hip_device.cpp @@ -499,7 +499,7 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_tR0600* props, int device) { deviceProps.multiProcessorCount = info.maxComputeUnits_; deviceProps.l2CacheSize = info.l2CacheSize_; deviceProps.maxThreadsPerMultiProcessor = info.maxThreadsPerCU_; - deviceProps.maxBlocksPerMultiProcessor = int(info.maxThreadsPerCU_ / info.maxWorkGroupSize_); + deviceProps.maxBlocksPerMultiProcessor = int(info.maxThreadsPerCU_ / info.wavefrontWidth_); deviceProps.computeMode = 0; deviceProps.clockInstructionRate = info.timeStampFrequency_; deviceProps.arch.hasGlobalInt32Atomics = 1; From 926eedc23089f1963f56d7220764d45fe748c792 Mon Sep 17 00:00:00 2001 From: "Chaudhary, Jatin Jaikishan" Date: Mon, 11 Aug 2025 05:47:27 +0100 Subject: [PATCH 2/2] SWDEV-508540 - adjust for compile time wavefront 64 (#747) --- rocclr/device/rocm/rockernel.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/rocclr/device/rocm/rockernel.cpp b/rocclr/device/rocm/rockernel.cpp index 5682b51b52..8fc79be382 100644 --- a/rocclr/device/rocm/rockernel.cpp +++ b/rocclr/device/rocm/rockernel.cpp @@ -132,8 +132,12 @@ bool Kernel::postLoad() { } } - uint32_t wavefront_size = 0; - if (hsa_agent_get_info(program()->rocDevice().getBackendDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, + // This can be set in code object and the value might be different than what HSA reports + // For example on Navi GPUs someone using -mwavefrontsize64 + // We set the value to HSA if the value is uninitialized + uint32_t wavefront_size = workGroupInfo_.wavefrontPerSIMD_; + if (wavefront_size == 0 && + hsa_agent_get_info(program()->rocDevice().getBackendDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size) != HSA_STATUS_SUCCESS) { DevLogPrintfError("[ROC][Kernel] Cannot get Wavefront Size, failed with hsa_status: %d \n", hsaStatus); @@ -169,7 +173,6 @@ bool Kernel::postLoad() { workGroupInfo_.preferredSizeMultiple_ = wavefront_size; workGroupInfo_.usedStackSize_ = kernelHasDynamicCallStack_; workGroupInfo_.wavefrontPerSIMD_ = program()->rocDevice().info().maxWorkItemSizes_[0] / wavefront_size; - workGroupInfo_.wavefrontSize_ = wavefront_size; workGroupInfo_.constMemSize_ = const_size_bytes; workGroupInfo_.maxDynamicSharedSizeBytes_ = static_cast(workGroupInfo_.availableLDSSize_ - workGroupInfo_.localMemSize_);