diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 0c298977d8..f884184381 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -234,9 +234,12 @@ struct Info : public amd::EmbeddedObject { //! A unique device vendor identifier. uint32_t vendorId_; - //! The number of parallel compute cores on the compute device. + //! The available number of parallel compute cores on the compute device. uint32_t maxComputeUnits_; + //! The max number of parallel compute cores on the compute device. + uint32_t maxBoostComputeUnits_; + //! Maximum dimensions that specify the global and local work-item IDs // used by the data-parallel execution model. uint32_t maxWorkItemDimensions_; diff --git a/projects/clr/rocclr/device/gpu/gpudevice.cpp b/projects/clr/rocclr/device/gpu/gpudevice.cpp index dde49631ec..6aab22a46a 100644 --- a/projects/clr/rocclr/device/gpu/gpudevice.cpp +++ b/projects/clr/rocclr/device/gpu/gpudevice.cpp @@ -364,6 +364,7 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf info_.type_ = CL_DEVICE_TYPE_GPU; info_.vendorId_ = 0x1002; info_.maxComputeUnits_ = calAttr.numberOfSIMD; + info_.maxBoostComputeUnits_ = calAttr.numberOfSIMD; info_.maxWorkItemDimensions_ = 3; info_.numberOfShaderEngines = calAttr.numberOfShaderEngines; diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index 56f1f0acf7..86803708f2 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -320,7 +320,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, info_.maxComputeUnits_ = settings().enableWgpMode_ ? palProp.gfxipProperties.shaderCore.numAvailableCus / 2 : palProp.gfxipProperties.shaderCore.numAvailableCus; - + info_.maxBoostComputeUnits_ = info_.maxComputeUnits_; info_.numberOfShaderEngines = palProp.gfxipProperties.shaderCore.numShaderEngines; // SI parts are scalar. Also, reads don't need to be 128-bits to get peak rates. diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index dd10f7eefc..d3c11f6fc0 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -1076,7 +1076,8 @@ bool Device::populateOCLDeviceConstants() { } if (HSA_STATUS_SUCCESS != - hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, + hsa_agent_get_info(_bkendDevice, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT, &info_.maxComputeUnits_)) { return false; } @@ -1086,6 +1087,17 @@ bool Device::populateOCLDeviceConstants() { ? info_.maxComputeUnits_ / 2 : info_.maxComputeUnits_; + if (HSA_STATUS_SUCCESS != + hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, + &info_.maxBoostComputeUnits_)) { + return false; + } + assert(info_.maxBoostComputeUnits_ > 0); + + info_.maxBoostComputeUnits_ = settings().enableWgpMode_ + ? info_.maxBoostComputeUnits_ / 2 + : info_.maxBoostComputeUnits_; + if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CACHELINE_SIZE, &info_.globalMemCacheLineSize_)) { @@ -2106,7 +2118,7 @@ bool Device::IpcAttach(const void* handle, size_t mem_size, size_t mem_offset, void* orig_dev_ptr = nullptr; // Retrieve the devPtr from the handle - hsa_status_t hsa_status = + hsa_status_t hsa_status = hsa_amd_ipc_memory_attach(reinterpret_cast(handle), mem_size, (1 + p2p_agents_.size()), p2p_agents_list_, &orig_dev_ptr);