SWDEV-301947 - Use new enum for CU count
Use HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT to get compute
units. This is needed to work around assymentric CU harvesting bug on
gfx90a. Add a new device property to get the max available CUs on the
device.
Change-Id: I878f38f14f16c1af01fc0a77157aea1e816a63b8
[ROCm/clr commit: 33aca5a4a6]
이 커밋은 다음에 포함됨:
@@ -234,9 +234,12 @@ struct Info : public amd::EmbeddedObject {
|
||||
//! A unique device vendor identifier.
|
||||
uint32_t vendorId_;
|
||||
|
||||
//! The number of parallel compute cores on the compute device.
|
||||
//! The available number of parallel compute cores on the compute device.
|
||||
uint32_t maxComputeUnits_;
|
||||
|
||||
//! The max number of parallel compute cores on the compute device.
|
||||
uint32_t maxBoostComputeUnits_;
|
||||
|
||||
//! Maximum dimensions that specify the global and local work-item IDs
|
||||
// used by the data-parallel execution model.
|
||||
uint32_t maxWorkItemDimensions_;
|
||||
|
||||
@@ -364,6 +364,7 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf
|
||||
info_.type_ = CL_DEVICE_TYPE_GPU;
|
||||
info_.vendorId_ = 0x1002;
|
||||
info_.maxComputeUnits_ = calAttr.numberOfSIMD;
|
||||
info_.maxBoostComputeUnits_ = calAttr.numberOfSIMD;
|
||||
info_.maxWorkItemDimensions_ = 3;
|
||||
info_.numberOfShaderEngines = calAttr.numberOfShaderEngines;
|
||||
|
||||
|
||||
@@ -320,7 +320,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
info_.maxComputeUnits_ = settings().enableWgpMode_
|
||||
? palProp.gfxipProperties.shaderCore.numAvailableCus / 2
|
||||
: palProp.gfxipProperties.shaderCore.numAvailableCus;
|
||||
|
||||
info_.maxBoostComputeUnits_ = info_.maxComputeUnits_;
|
||||
info_.numberOfShaderEngines = palProp.gfxipProperties.shaderCore.numShaderEngines;
|
||||
|
||||
// SI parts are scalar. Also, reads don't need to be 128-bits to get peak rates.
|
||||
|
||||
@@ -1076,7 +1076,8 @@ bool Device::populateOCLDeviceConstants() {
|
||||
}
|
||||
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
|
||||
hsa_agent_get_info(_bkendDevice,
|
||||
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT,
|
||||
&info_.maxComputeUnits_)) {
|
||||
return false;
|
||||
}
|
||||
@@ -1086,6 +1087,17 @@ bool Device::populateOCLDeviceConstants() {
|
||||
? info_.maxComputeUnits_ / 2
|
||||
: info_.maxComputeUnits_;
|
||||
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
|
||||
&info_.maxBoostComputeUnits_)) {
|
||||
return false;
|
||||
}
|
||||
assert(info_.maxBoostComputeUnits_ > 0);
|
||||
|
||||
info_.maxBoostComputeUnits_ = settings().enableWgpMode_
|
||||
? info_.maxBoostComputeUnits_ / 2
|
||||
: info_.maxBoostComputeUnits_;
|
||||
|
||||
if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice,
|
||||
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_CACHELINE_SIZE,
|
||||
&info_.globalMemCacheLineSize_)) {
|
||||
@@ -2106,7 +2118,7 @@ bool Device::IpcAttach(const void* handle, size_t mem_size, size_t mem_offset,
|
||||
void* orig_dev_ptr = nullptr;
|
||||
|
||||
// Retrieve the devPtr from the handle
|
||||
hsa_status_t hsa_status =
|
||||
hsa_status_t hsa_status =
|
||||
hsa_amd_ipc_memory_attach(reinterpret_cast<const hsa_amd_ipc_memory_t*>(handle),
|
||||
mem_size, (1 + p2p_agents_.size()), p2p_agents_list_,
|
||||
&orig_dev_ptr);
|
||||
|
||||
새 이슈에서 참조
사용자 차단