diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index cfe3dec06e..93b79a0876 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -603,6 +603,8 @@ struct Info : public amd::EmbeddedObject { //! AQL Barrier Value Packet support bool aqlBarrierValue_; + + bool pcie_atomics_; //!< Pcie atomics support flag }; //! Device settings diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index d8eae84e98..6c71cce387 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -207,6 +207,16 @@ void Device::setupCpuAgent() { system_segment_.handle, system_coarse_segment_.handle, _bkendDevice.handle); } +void Device::checkAtomicSupport() { + std::vector link_attrs; + link_attrs.push_back(std::make_pair(LinkAttribute::kLinkAtomicSupport, 0)); + if (findLinkInfo(system_segment_, &link_attrs)) { + if (link_attrs[0].second == 1) { + info_.pcie_atomics_ = true; + } + } +} + Device::~Device() { #ifdef WITH_AMDGPU_PRO delete pro_device_; @@ -1167,6 +1177,8 @@ bool Device::populateOCLDeviceConstants() { setupCpuAgent(); + checkAtomicSupport(); + assert(system_segment_.handle != 0); if (HSA_STATUS_SUCCESS != hsa_amd_agent_iterate_memory_pools( _bkendDevice, Device::iterateGpuMemoryPoolCallback, this)) { diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp index 3f080742bf..43c235826d 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -374,6 +374,9 @@ class Device : public NullDevice { static const std::vector& getCpuAgents() { return cpu_agents_; } void setupCpuAgent(); // Setup the CPU agent which has the least NUMA distance to this GPU + + void checkAtomicSupport(); //!< Check the support for pcie atomics + //! Destructor for the physical HSA device virtual ~Device(); diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index abbd26b6a5..00283a5562 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -2781,7 +2781,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const break; } case amd::KernelParameterDescriptor::HiddenHostcallBuffer: { - if (amd::IS_HIP) { + if (roc_device_.info().pcie_atomics_) { uintptr_t buffer = reinterpret_cast( roc_device_.getOrCreateHostcallBuffer(gpu_queue_, coopGroups, cuMask_)); if (!buffer) { @@ -2790,6 +2790,10 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const return false; } WriteAqlArgAt(hidden_arguments, buffer, it.size_, it.offset_); + } else { + ClPrint(amd::LOG_ERROR, amd::LOG_KERN, + "Pcie atomics not enabled, printf not supported"); + return false; } break; }