From d849b88aef60723fa97b48e9a06fe2a42414e5f4 Mon Sep 17 00:00:00 2001 From: AidanBeltonS Date: Wed, 26 Nov 2025 16:10:58 +0000 Subject: [PATCH] SWDEV-558080 - Add recommended granularity (#1176) * Add recommended granularity * Improve granularity testing * Update based on feedback --- .../clr/hipamd/src/hip_graph_internal.hpp | 8 +-- projects/clr/hipamd/src/hip_vm.cpp | 8 ++- projects/clr/rocclr/device/device.hpp | 3 +- projects/clr/rocclr/device/pal/paldevice.cpp | 4 +- projects/clr/rocclr/device/rocm/rocdevice.cpp | 11 +++- projects/clr/rocclr/platform/vmheap.cpp | 2 +- .../hipMemGetAllocationGranularity.cc | 57 ++++--------------- 7 files changed, 38 insertions(+), 55 deletions(-) diff --git a/projects/clr/hipamd/src/hip_graph_internal.hpp b/projects/clr/hipamd/src/hip_graph_internal.hpp index 1acbc81e48..0539d5bd4b 100644 --- a/projects/clr/hipamd/src/hip_graph_internal.hpp +++ b/projects/clr/hipamd/src/hip_graph_internal.hpp @@ -719,10 +719,10 @@ class Graph { void* ptr; const auto& dev_info = g_devices[0]->devices()[0]->info(); - size = amd::alignUp(size, dev_info.virtualMemAllocGranularity_); + size = amd::alignUp(size, dev_info.virtualMemAllocGranularityRecommended_); // Single virtual alloc would reserve for all devices. ptr = g_devices[0]->devices()[0]->virtualAlloc(startAddress, size, - dev_info.virtualMemAllocGranularity_); + dev_info.virtualMemAllocGranularityRecommended_); if (ptr == nullptr) { LogError("Failed to reserve Virtual Address"); } @@ -2412,7 +2412,7 @@ class GraphMemAllocNode final : public GraphNode { } // Allocate real memory for mapping const auto& dev_info = queue()->device().info(); - auto aligned_size = amd::alignUp(size_, dev_info.virtualMemAllocGranularity_); + auto aligned_size = amd::alignUp(size_, dev_info.virtualMemAllocGranularityRecommended_); auto dptr = graph_->AllocateMemory(aligned_size, static_cast(queue()), nullptr); if (dptr == nullptr) { setStatus(CL_INVALID_OPERATION); @@ -2609,7 +2609,7 @@ class GraphMemFreeNode : public GraphNode { // Unmap virtual address from memory amd::Command* cmd = new VirtualMemFreeNode( graph, stream->DeviceId(), *stream, amd::Command::EventWaitList{}, device_ptr_, - amd::alignUp(va->getSize(), dev_info.virtualMemAllocGranularity_), nullptr); + amd::alignUp(va->getSize(), dev_info.virtualMemAllocGranularityRecommended_), nullptr); commands_.push_back(cmd); ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph FreeMem create: %p", device_ptr_); } diff --git a/projects/clr/hipamd/src/hip_vm.cpp b/projects/clr/hipamd/src/hip_vm.cpp index 85a4c9beee..5b766e6d69 100644 --- a/projects/clr/hipamd/src/hip_vm.cpp +++ b/projects/clr/hipamd/src/hip_vm.cpp @@ -60,7 +60,7 @@ hipError_t hipMemAddressReserve(void** ptr, size_t size, size_t alignment, void* } const auto& dev_info = g_devices[0]->devices()[0]->info(); - if (size == 0 || ((size % dev_info.virtualMemAllocGranularity_) != 0) || + if (size == 0 || ((size % dev_info.virtualMemAllocGranularityMinimum_) != 0) || ((alignment & (alignment - 1)) != 0)) { HIP_RETURN(hipErrorInvalidValue); } @@ -228,7 +228,11 @@ hipError_t hipMemGetAllocationGranularity(size_t* granularity, const hipMemAlloc amd::Context* amdContext = useHostDevice ? hip::host_context : curDevContext; const auto& dev_info = amdContext->devices()[0]->info(); - *granularity = dev_info.virtualMemAllocGranularity_; + if (option == hipMemAllocationGranularityMinimum) { + *granularity = dev_info.virtualMemAllocGranularityMinimum_; + } else { + *granularity = dev_info.virtualMemAllocGranularityRecommended_; + } HIP_RETURN(hipSuccess); } diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 6310094ab4..6d9a74a47e 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -650,7 +650,8 @@ struct Info : public amd::EmbeddedObject { bool pcie_atomics_; //!< Pcie atomics support flag bool virtualMemoryManagement_; //!< Virtual memory management support - size_t virtualMemAllocGranularity_; //!< virtual memory allocation size/addr granularity + size_t virtualMemAllocGranularityMinimum_; //!< minimum virtual memory allocation size/addr granularity + size_t virtualMemAllocGranularityRecommended_; //!< recommended virtual memory allocation size/addr granularity uint32_t driverNodeId_; //! Number of Physical SGPRs per SIMD diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index ab476b6a9b..c0ab9dfd47 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -630,7 +630,9 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, #endif // _WIN64 } info_.virtualMemoryManagement_ = true; - info_.virtualMemAllocGranularity_ = + info_.virtualMemAllocGranularityMinimum_ = + static_cast(palProp.gpuMemoryProperties.virtualMemAllocGranularity); + info_.virtualMemAllocGranularityRecommended_ = static_cast(palProp.gpuMemoryProperties.virtualMemAllocGranularity); info_.vgprAllocGranularity_ = palProp.gfxipProperties.shaderCore.vgprAllocGranularity; info_.vgprsPerSimd_ = palProp.gfxipProperties.shaderCore.vgprsPerSimd; diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 38abb065c8..75020e2af8 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -792,13 +792,22 @@ hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo // Query the recommended granularity for this pool. stat = Hsa::memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, - &(dev->info_.virtualMemAllocGranularity_)); + &(dev->info_.virtualMemAllocGranularityMinimum_)); if (stat != HSA_STATUS_SUCCESS) { LogPrintfError( "Cannot query HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE info" "failed with hsa_status: %d \n", stat); } + // Query the recommended granularity for this pool. + stat = Hsa::memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE, + &(dev->info_.virtualMemAllocGranularityRecommended_)); + if (stat != HSA_STATUS_SUCCESS) { + LogPrintfError( + "Cannot query HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE info" + "failed with hsa_status: %d \n", + stat); + } } if (dev->gpuvm_segment_.handle == 0) { diff --git a/projects/clr/rocclr/platform/vmheap.cpp b/projects/clr/rocclr/platform/vmheap.cpp index c9740a0361..f08a33eaff 100644 --- a/projects/clr/rocclr/platform/vmheap.cpp +++ b/projects/clr/rocclr/platform/vmheap.cpp @@ -51,7 +51,7 @@ bool VmHeap::ReleaseAddressRange(void* addr) { // ================================================================================================ bool VmHeap::CommitMemory(void* addr, size_t size) { const auto& dev_info = device_->info(); - size_t granularity = dev_info.virtualMemAllocGranularity_; + size_t granularity = dev_info.virtualMemAllocGranularityRecommended_; auto padded_size = alignUp(size, granularity); // Allocate physical memory diff --git a/projects/hip-tests/catch/unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc b/projects/hip-tests/catch/unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc index bb19773f92..b84557968a 100644 --- a/projects/hip-tests/catch/unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc +++ b/projects/hip-tests/catch/unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc @@ -47,48 +47,6 @@ void getGranularity(size_t* granularity, hipMemAllocationGranularity_flags optio HIP_CHECK(hipMemGetAllocationGranularity(granularity, &prop, option)); } -/** - * Test Description - * ------------------------ - * - Functional Test to get granularity size for - * hipMemAllocationGranularityMinimum option. - * ------------------------ - * - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.1 - */ -TEST_CASE("Unit_hipMemGetAllocationGranularity_MinGranularity") { - HIP_CHECK(hipFree(0)); - size_t granularity = 0; - hipDevice_t device; - HIP_CHECK(hipDeviceGet(&device, 0)); - checkVMMSupported(device); - getGranularity(&granularity, hipMemAllocationGranularityMinimum, 0); - REQUIRE(granularity > 0); -} - -/** - * Test Description - * ------------------------ - * - Functional Test to get granularity size for - * hipMemAllocationGranularityRecommended option. - * ------------------------ - * - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.1 - */ -TEST_CASE("Unit_hipMemGetAllocationGranularity_RecommendedGranularity") { - HIP_CHECK(hipFree(0)); - size_t granularity = 0; - hipDevice_t device; - HIP_CHECK(hipDeviceGet(&device, 0)); - checkVMMSupported(device); - getGranularity(&granularity, hipMemAllocationGranularityRecommended, 0); - REQUIRE(granularity > 0); -} - /** * Test Description * ------------------------ @@ -105,12 +63,21 @@ TEST_CASE("Unit_hipMemGetAllocationGranularity_AllGPUs") { int numDevices = 0; HIP_CHECK(hipGetDeviceCount(&numDevices)); for (int dev = 0; dev < numDevices; dev++) { - size_t granularity = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, dev)); checkVMMSupported(device); - getGranularity(&granularity, hipMemAllocationGranularityRecommended, dev); - REQUIRE(granularity > 0); + + size_t min_granularity = 0; + size_t recommended_granularity = 0; + + getGranularity(&min_granularity, hipMemAllocationGranularityMinimum, dev); + REQUIRE(min_granularity >= 1024); + + getGranularity(&recommended_granularity, hipMemAllocationGranularityRecommended, dev); + REQUIRE(recommended_granularity >= 1024); + + // Check the recommended_granularity is greater than or equal to the minimum + REQUIRE(recommended_granularity >= min_granularity); } }