SWDEV-301947 - Use new enum for CU count

Use HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT to get compute
units. This is needed to work around assymentric CU harvesting bug on
gfx90a. Add a new device property to get the max available CUs on the
device.

Change-Id: I878f38f14f16c1af01fc0a77157aea1e816a63b8


[ROCm/clr commit: 33aca5a4a6]
이 커밋은 다음에 포함됨:
Saleel Kudchadker
2022-01-27 15:48:27 -08:00
부모 7929d01be3
커밋 d0e8d72bfa
4개의 변경된 파일20개의 추가작업 그리고 4개의 파일을 삭제
+4 -1
파일 보기
@@ -234,9 +234,12 @@ struct Info : public amd::EmbeddedObject {
//! A unique device vendor identifier.
uint32_t vendorId_;
//! The number of parallel compute cores on the compute device.
//! The available number of parallel compute cores on the compute device.
uint32_t maxComputeUnits_;
//! The max number of parallel compute cores on the compute device.
uint32_t maxBoostComputeUnits_;
//! Maximum dimensions that specify the global and local work-item IDs
// used by the data-parallel execution model.
uint32_t maxWorkItemDimensions_;
+1
파일 보기
@@ -364,6 +364,7 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf
info_.type_ = CL_DEVICE_TYPE_GPU;
info_.vendorId_ = 0x1002;
info_.maxComputeUnits_ = calAttr.numberOfSIMD;
info_.maxBoostComputeUnits_ = calAttr.numberOfSIMD;
info_.maxWorkItemDimensions_ = 3;
info_.numberOfShaderEngines = calAttr.numberOfShaderEngines;
+1 -1
파일 보기
@@ -320,7 +320,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
info_.maxComputeUnits_ = settings().enableWgpMode_
? palProp.gfxipProperties.shaderCore.numAvailableCus / 2
: palProp.gfxipProperties.shaderCore.numAvailableCus;
info_.maxBoostComputeUnits_ = info_.maxComputeUnits_;
info_.numberOfShaderEngines = palProp.gfxipProperties.shaderCore.numShaderEngines;
// SI parts are scalar. Also, reads don't need to be 128-bits to get peak rates.
+14 -2
파일 보기
@@ -1076,7 +1076,8 @@ bool Device::populateOCLDeviceConstants() {
}
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
hsa_agent_get_info(_bkendDevice,
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT,
&info_.maxComputeUnits_)) {
return false;
}
@@ -1086,6 +1087,17 @@ bool Device::populateOCLDeviceConstants() {
? info_.maxComputeUnits_ / 2
: info_.maxComputeUnits_;
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
&info_.maxBoostComputeUnits_)) {
return false;
}
assert(info_.maxBoostComputeUnits_ > 0);
info_.maxBoostComputeUnits_ = settings().enableWgpMode_
? info_.maxBoostComputeUnits_ / 2
: info_.maxBoostComputeUnits_;
if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice,
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_CACHELINE_SIZE,
&info_.globalMemCacheLineSize_)) {
@@ -2106,7 +2118,7 @@ bool Device::IpcAttach(const void* handle, size_t mem_size, size_t mem_offset,
void* orig_dev_ptr = nullptr;
// Retrieve the devPtr from the handle
hsa_status_t hsa_status =
hsa_status_t hsa_status =
hsa_amd_ipc_memory_attach(reinterpret_cast<const hsa_amd_ipc_memory_t*>(handle),
mem_size, (1 + p2p_agents_.size()), p2p_agents_list_,
&orig_dev_ptr);