diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 2c62a2b2f5..acdd3349f9 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -1444,15 +1444,13 @@ bool Device::populateOCLDeviceConstants() { } if (settings().checkExtension(ClAmdDeviceAttributeQuery)) { - info_.simdPerCU_ = settings().enableWgpMode_ - ? (2 * isa().simdPerCU()) - : isa().simdPerCU(); info_.simdWidth_ = isa().simdWidth(); info_.simdInstructionWidth_ = isa().simdInstructionWidth(); if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_WAVEFRONT_SIZE, &info_.wavefrontWidth_)) { return false; } + if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, static_cast(HSA_AMD_AGENT_INFO_MEMORY_WIDTH), @@ -1460,7 +1458,14 @@ bool Device::populateOCLDeviceConstants() { return false; } - uint32_t max_waves_per_cu; + if (HSA_STATUS_SUCCESS != + hsa_agent_get_info(_bkendDevice, + static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), + &info_.simdPerCU_)) { + return false; + } + + uint32_t max_waves_per_cu = 0; if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), @@ -1468,6 +1473,11 @@ bool Device::populateOCLDeviceConstants() { return false; } + if (settings().enableWgpMode_) { + info_.simdPerCU_ *= 2; + max_waves_per_cu *= 2; + } + info_.maxThreadsPerCU_ = info_.wavefrontWidth_ * max_waves_per_cu; uint32_t cache_sizes[4]; /* FIXIT [skudchad] - Seems like hardcoded in HSA backend so 0*/