diff --git a/projects/clr/hipamd/src/hip_graph_internal.cpp b/projects/clr/hipamd/src/hip_graph_internal.cpp index d35b810acc..ec42414f8d 100644 --- a/projects/clr/hipamd/src/hip_graph_internal.cpp +++ b/projects/clr/hipamd/src/hip_graph_internal.cpp @@ -396,9 +396,17 @@ hipError_t GraphExec::CaptureAQLPackets() { } if (device_kernarg_pool_ && !device->isXgmi()) { - *device->info().hdpMemFlushCntl = 1u; - if (*device->info().hdpMemFlushCntl != UINT32_MAX) { - LogError("Unexpected HDP Register readback value!"); + if (device->info().hdpMemFlushCntl != nullptr) { + *device->info().hdpMemFlushCntl = 1u; + if (*device->info().hdpMemFlushCntl != UINT32_MAX) { + LogError("Unexpected HDP Register readback value!"); + } + } else { + amd::Command* command = new amd::Marker(*capture_stream_, true); + if (command != nullptr) { + command->enqueue(); + command->release(); + } } } diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index c4afdf9194..5eee23e650 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -705,6 +705,27 @@ bool Device::create() { return false; } + setupCpuAgent(); + + // Get Agent HDP Flush Register Memory + hsa_amd_hdp_flush_t hdpInfo; + if (HSA_STATUS_SUCCESS != + hsa_agent_get_info(bkendDevice_, + static_cast(HSA_AMD_AGENT_INFO_HDP_FLUSH), &hdpInfo)) { + LogPrintfError("Unable to determine HDP flush info for HSA device %s", agent_name); + return false; + } + + info_.hdpMemFlushCntl = hdpInfo.HDP_MEM_FLUSH_CNTL; + info_.hdpRegFlushCntl = hdpInfo.HDP_REG_FLUSH_CNTL; + + bool device_kernel_args = true; + if (!isXgmi_ && ((info_.hdpMemFlushCntl == nullptr) || (info_.hdpRegFlushCntl == nullptr))) { + LogWarning("Unable to determine HDP flush register address. " + "Device kernel arguments are not supported"); + device_kernel_args = false; + } + // Create HSA settings assert(!settings_); roc::Settings* hsaSettings = new roc::Settings(); @@ -713,7 +734,7 @@ bool Device::create() { !hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), isa->versionMajor(), isa->versionMinor(), isa->versionStepping(), isa->xnack() == amd::Isa::Feature::Enabled, - coop_groups)) { + coop_groups, device_kernel_args)) { LogPrintfError("Unable to create settings for HSA device %s (PCI ID %x)", agent_name, pciDeviceId_); return false; @@ -754,17 +775,6 @@ bool Device::create() { } info_.pciDomainID = pci_domain_id; - // Get Agent HDP Flush Register Memory - hsa_amd_hdp_flush_t hdpInfo; - if (HSA_STATUS_SUCCESS != - hsa_agent_get_info(bkendDevice_, - static_cast(HSA_AMD_AGENT_INFO_HDP_FLUSH), &hdpInfo)) { - LogPrintfError("Unable to determine HDP flush info for HSA device %s", agent_name); - return false; - } - info_.hdpMemFlushCntl = hdpInfo.HDP_MEM_FLUSH_CNTL; - info_.hdpRegFlushCntl = hdpInfo.HDP_REG_FLUSH_CNTL; - if (populateOCLDeviceConstants() == false) { LogPrintfError("populateOCLDeviceConstants failed for HSA device %s (PCI ID %x)", agent_name, pciDeviceId_); @@ -1257,7 +1267,6 @@ bool Device::populateOCLDeviceConstants() { engineAssignMap_[1 << i] = 0; } - setupCpuAgent(); checkAtomicSupport(); diff --git a/projects/clr/rocclr/device/rocm/rocsettings.cpp b/projects/clr/rocclr/device/rocm/rocsettings.cpp index 3f479ad833..a1688794ac 100644 --- a/projects/clr/rocclr/device/rocm/rocsettings.cpp +++ b/projects/clr/rocclr/device/rocm/rocsettings.cpp @@ -102,7 +102,8 @@ Settings::Settings() { // ================================================================================================ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, - uint32_t gfxStepping, bool enableXNACK, bool coop_groups) { + uint32_t gfxStepping, bool enableXNACK, bool coop_groups, + bool device_kernel_args) { customHostAllocator_ = false; if (fullProfile) { @@ -168,7 +169,7 @@ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor // Enable device kernel args for MI300* for now if (gfxipMajor == 9 && gfxipMinor == 4 && (gfxStepping == 0 || gfxStepping == 1 || gfxStepping == 2)) { - device_kernel_args_ = HIP_FORCE_DEV_KERNARG; + device_kernel_args_ = HIP_FORCE_DEV_KERNARG && device_kernel_args; } if (gfxipMajor >= 10) { diff --git a/projects/clr/rocclr/device/rocm/rocsettings.hpp b/projects/clr/rocclr/device/rocm/rocsettings.hpp index bd68da8fa8..4b6e384c48 100644 --- a/projects/clr/rocclr/device/rocm/rocsettings.hpp +++ b/projects/clr/rocclr/device/rocm/rocsettings.hpp @@ -84,7 +84,8 @@ class Settings : public device::Settings { //! Creates settings bool create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, uint32_t gfxStepping, - bool enableXNACK, bool coop_groups = false); + bool enableXNACK, bool coop_groups = false, + bool device_kernel_args = true); private: //! Disable copy constructor diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 6bcf44d213..fd9bb7faaf 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -3210,7 +3210,9 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, } } - const auto pcieKernargs = !dev().isXgmi() && dev().settings().device_kernel_args_; + const auto pcieKernargs = !dev().isXgmi() && + dev().settings().device_kernel_args_ && + roc_device_.info().largeBar_; address argBuffer = hidden_arguments; bool isGraphCapture = vcmd != nullptr && vcmd->getCapturingState();