diff --git a/rocclr/runtime/device/rocm/rocdevice.cpp b/rocclr/runtime/device/rocm/rocdevice.cpp index bf15d99d92..7bff422b40 100644 --- a/rocclr/runtime/device/rocm/rocdevice.cpp +++ b/rocclr/runtime/device/rocm/rocdevice.cpp @@ -1024,6 +1024,10 @@ bool Device::populateOCLDeviceConstants() { } assert(info_.maxComputeUnits_ > 0); + info_.maxComputeUnits_ = settings().enableWgpMode_ + ? info_.maxComputeUnits_ / 2 + : info_.maxComputeUnits_; + if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CACHELINE_SIZE, &info_.globalMemCacheLineSize_)) { @@ -1406,10 +1410,8 @@ bool Device::populateOCLDeviceConstants() { info_.gfxipVersion_ = deviceInfo_.gfxipVersion_; info_.numAsyncQueues_ = kMaxAsyncQueues; info_.numRTQueues_ = info_.numAsyncQueues_; - if (HSA_STATUS_SUCCESS != - hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &info_.numRTCUs_)) { - return false; - } + info_.numRTCUs_ = info_.maxComputeUnits_; + //TODO: set to true once thread trace support is available info_.threadTraceEnable_ = false; info_.pcieDeviceId_ = deviceInfo_.pciDeviceId_; diff --git a/rocclr/runtime/device/rocm/rocsettings.cpp b/rocclr/runtime/device/rocm/rocsettings.cpp index 277cf3c0cd..e6f77c0caf 100644 --- a/rocclr/runtime/device/rocm/rocsettings.cpp +++ b/rocclr/runtime/device/rocm/rocsettings.cpp @@ -126,8 +126,14 @@ bool Settings::create(bool fullProfile, int gfxipVersion) { } if (gfxipVersion >= 1000) { - lcWavefrontSize64_ = false; + enableWave32Mode_ = true; + enableWgpMode_ = GPU_ENABLE_WGP_MODE; } + if (!flagIsDefault(GPU_ENABLE_WAVE32_MODE)) { + enableWave32Mode_ = GPU_ENABLE_WAVE32_MODE; + } + + lcWavefrontSize64_ = !enableWave32Mode_; // Override current device settings override();