From 7d5b4a8f7a7d34f008d65277f8aae4c98a6da375 Mon Sep 17 00:00:00 2001 From: kjayapra-amd Date: Thu, 25 Jan 2024 18:25:36 -0500 Subject: [PATCH] SWDEV-437832 - Changes to update host unified memory and iommuv2 flags. Change-Id: I88998cf57c21fc446fa28e250f826c607923670b --- hipamd/src/hip_device.cpp | 6 +++--- rocclr/device/device.hpp | 9 +++++---- rocclr/device/pal/paldevice.cpp | 2 ++ rocclr/device/rocm/rocdevice.cpp | 7 ++++--- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/hipamd/src/hip_device.cpp b/hipamd/src/hip_device.cpp index b5d16ee40f..7f4dfb06af 100644 --- a/hipamd/src/hip_device.cpp +++ b/hipamd/src/hip_device.cpp @@ -401,8 +401,8 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_tR0600* props, int device) { deviceProps.directManagedMemAccessFromHost = info.hmmDirectHostAccess_; deviceProps.canUseHostPointerForRegisteredMem = info.hostUnifiedMemory_; deviceProps.pageableMemoryAccess = info.hmmCpuMemoryAccessible_; - deviceProps.hostRegisterSupported = info.hostUnifiedMemory_; - deviceProps.pageableMemoryAccessUsesHostPageTables = info.hostUnifiedMemory_; + deviceProps.hostRegisterSupported = true; + deviceProps.pageableMemoryAccessUsesHostPageTables = info.iommuv2_; // Mem pool deviceProps.memoryPoolsSupported = HIP_MEM_POOL_SUPPORT; @@ -467,7 +467,7 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_tR0600* props, int device) { deviceProps.timelineSemaphoreInteropSupported = 0; deviceProps.unifiedFunctionPointers = 0; - deviceProps.integrated = info.accelerator_; + deviceProps.integrated = info.hostUnifiedMemory_; *props = deviceProps; return hipSuccess; diff --git a/rocclr/device/device.hpp b/rocclr/device/device.hpp index 2dc7b09ee6..5a98f9157c 100644 --- a/rocclr/device/device.hpp +++ b/rocclr/device/device.hpp @@ -412,10 +412,13 @@ struct Info : public amd::EmbeddedObject { // the device implement error correction. uint32_t errorCorrectionSupport_; - //! true if the device and the host have a unified memory subsystem and - // is false otherwise. + //! true if the device and the host have a unified memory and is false otherwise. uint32_t hostUnifiedMemory_; + //! true if the device and the host have a unified memory management subsystem and + // is false otherwise. + uint32_t iommuv2_; + //! Describes the resolution of device timer. size_t profilingTimerResolution_; @@ -624,8 +627,6 @@ struct Info : public amd::EmbeddedObject { //! global CU mask which will be applied to all queues created on this device std::vector globalCUMask_; - bool accelerator_; //!< Accelerator or discrete graphics card. - //! AQL Barrier Value Packet support bool aqlBarrierValue_; diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index a1e1247ce2..864a6c8e44 100644 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -513,6 +513,8 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, info_.hostUnifiedMemory_ = true; } + info_.iommuv2_ = palProp.gpuMemoryProperties.flags.iommuv2Support; + info_.profilingTimerResolution_ = 1; info_.profilingTimerOffset_ = amd::Os::offsetToEpochNanos(); info_.littleEndian_ = true; diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 15478fe5ad..361959f6a6 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -1425,6 +1425,7 @@ bool Device::populateOCLDeviceConstants() { if (agent_profile_ == HSA_PROFILE_FULL) { // full-profile = participating in coherent memory, // base-profile = NUMA based non-coherent memory info_.hostUnifiedMemory_ = true; + info_.iommuv2_ = true; } info_.memBaseAddrAlign_ = 8 * (flagIsDefault(MEMOBJ_BASE_ADDR_ALIGN) ? sizeof(int64_t[16]) : MEMOBJ_BASE_ADDR_ALIGN); @@ -1625,14 +1626,14 @@ bool Device::populateOCLDeviceConstants() { } if (amd::IS_HIP) { // Report atomics capability based on GFX IP, control on Hawaii - if (info_.hostUnifiedMemory_ || isa().versionMajor() >= 8) { + if (info_.iommuv2_ || isa().versionMajor() >= 8) { info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS; } } else if (!settings().useLightning_) { // Report atomics capability based on GFX IP, control on Hawaii // and Vega10. - if (info_.hostUnifiedMemory_ || (isa().versionMajor() == 8)) { + if (info_.iommuv2_ || (isa().versionMajor() == 8)) { info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS; } } @@ -1847,7 +1848,7 @@ bool Device::populateOCLDeviceConstants() { // Check if the device is APU if (hsa_flag_isset64(memory_properties, HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU)) { - info_.accelerator_ = 1; + info_.hostUnifiedMemory_ = 1; } return true;