SWDEV-451166 - Disable kernel args for non-XGMI if HDP flush register is invalid

Change-Id: I227e046e2b9cb25476a50240f5d070adbd558f21


[ROCm/clr commit: 96f5c44851]
This commit is contained in:
Ioannis Assiouras
2024-03-13 22:50:58 +00:00
szülő ec391d7249
commit b46d3c0f8d
5 fájl változott, egészen pontosan 41 új sor hozzáadva és 20 régi sor törölve
@@ -396,9 +396,17 @@ hipError_t GraphExec::CaptureAQLPackets() {
}
if (device_kernarg_pool_ && !device->isXgmi()) {
*device->info().hdpMemFlushCntl = 1u;
if (*device->info().hdpMemFlushCntl != UINT32_MAX) {
LogError("Unexpected HDP Register readback value!");
if (device->info().hdpMemFlushCntl != nullptr) {
*device->info().hdpMemFlushCntl = 1u;
if (*device->info().hdpMemFlushCntl != UINT32_MAX) {
LogError("Unexpected HDP Register readback value!");
}
} else {
amd::Command* command = new amd::Marker(*capture_stream_, true);
if (command != nullptr) {
command->enqueue();
command->release();
}
}
}
@@ -705,6 +705,27 @@ bool Device::create() {
return false;
}
setupCpuAgent();
// Get Agent HDP Flush Register Memory
hsa_amd_hdp_flush_t hdpInfo;
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(bkendDevice_,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_HDP_FLUSH), &hdpInfo)) {
LogPrintfError("Unable to determine HDP flush info for HSA device %s", agent_name);
return false;
}
info_.hdpMemFlushCntl = hdpInfo.HDP_MEM_FLUSH_CNTL;
info_.hdpRegFlushCntl = hdpInfo.HDP_REG_FLUSH_CNTL;
bool device_kernel_args = true;
if (!isXgmi_ && ((info_.hdpMemFlushCntl == nullptr) || (info_.hdpRegFlushCntl == nullptr))) {
LogWarning("Unable to determine HDP flush register address. "
"Device kernel arguments are not supported");
device_kernel_args = false;
}
// Create HSA settings
assert(!settings_);
roc::Settings* hsaSettings = new roc::Settings();
@@ -713,7 +734,7 @@ bool Device::create() {
!hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), isa->versionMajor(),
isa->versionMinor(), isa->versionStepping(),
isa->xnack() == amd::Isa::Feature::Enabled,
coop_groups)) {
coop_groups, device_kernel_args)) {
LogPrintfError("Unable to create settings for HSA device %s (PCI ID %x)", agent_name,
pciDeviceId_);
return false;
@@ -754,17 +775,6 @@ bool Device::create() {
}
info_.pciDomainID = pci_domain_id;
// Get Agent HDP Flush Register Memory
hsa_amd_hdp_flush_t hdpInfo;
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(bkendDevice_,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_HDP_FLUSH), &hdpInfo)) {
LogPrintfError("Unable to determine HDP flush info for HSA device %s", agent_name);
return false;
}
info_.hdpMemFlushCntl = hdpInfo.HDP_MEM_FLUSH_CNTL;
info_.hdpRegFlushCntl = hdpInfo.HDP_REG_FLUSH_CNTL;
if (populateOCLDeviceConstants() == false) {
LogPrintfError("populateOCLDeviceConstants failed for HSA device %s (PCI ID %x)", agent_name,
pciDeviceId_);
@@ -1257,7 +1267,6 @@ bool Device::populateOCLDeviceConstants() {
engineAssignMap_[1 << i] = 0;
}
setupCpuAgent();
checkAtomicSupport();
@@ -102,7 +102,8 @@ Settings::Settings() {
// ================================================================================================
bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor,
uint32_t gfxStepping, bool enableXNACK, bool coop_groups) {
uint32_t gfxStepping, bool enableXNACK, bool coop_groups,
bool device_kernel_args) {
customHostAllocator_ = false;
if (fullProfile) {
@@ -168,7 +169,7 @@ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor
// Enable device kernel args for MI300* for now
if (gfxipMajor == 9 && gfxipMinor == 4 &&
(gfxStepping == 0 || gfxStepping == 1 || gfxStepping == 2)) {
device_kernel_args_ = HIP_FORCE_DEV_KERNARG;
device_kernel_args_ = HIP_FORCE_DEV_KERNARG && device_kernel_args;
}
if (gfxipMajor >= 10) {
@@ -84,7 +84,8 @@ class Settings : public device::Settings {
//! Creates settings
bool create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, uint32_t gfxStepping,
bool enableXNACK, bool coop_groups = false);
bool enableXNACK, bool coop_groups = false,
bool device_kernel_args = true);
private:
//! Disable copy constructor
@@ -3210,7 +3210,9 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes,
}
}
const auto pcieKernargs = !dev().isXgmi() && dev().settings().device_kernel_args_;
const auto pcieKernargs = !dev().isXgmi() &&
dev().settings().device_kernel_args_ &&
roc_device_.info().largeBar_;
address argBuffer = hidden_arguments;
bool isGraphCapture = vcmd != nullptr && vcmd->getCapturingState();