From 0ba5a01baa367c58361170521ed8f24dfc4798fb Mon Sep 17 00:00:00 2001 From: Sunday Clement <83687182+Sundance636@users.noreply.github.com> Date: Wed, 21 Jan 2026 16:52:15 -0500 Subject: [PATCH] rocr: SVMPrefetch to a particular numa node (#1063) In order for hipMemPrefetchAysnc_v2() api to work, we need rocr to migrates the ranges of pages requested to the particular NUMA node in question, via move_pages(). Signed-off-by: Sunday Clement --- projects/rocr-runtime/CMakeLists.txt | 9 +++++- .../hsa-runtime/core/runtime/runtime.cpp | 28 +++++++++++++++---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/projects/rocr-runtime/CMakeLists.txt b/projects/rocr-runtime/CMakeLists.txt index 44850dd7cf..794beef3cb 100644 --- a/projects/rocr-runtime/CMakeLists.txt +++ b/projects/rocr-runtime/CMakeLists.txt @@ -60,6 +60,13 @@ if (NOT DEFINED BUILD_ROCR) set(BUILD_ROCR ON) endif() +find_package(NUMA) +if(NUMA_FOUND) + set(NUMA "${NUMA_LIBRARIES}") +else() + find_library(NUMA NAMES numa REQUIRED) +endif() + function(add_rocm_subdir subdir subdir_assigns) message("add_rocm_subdir() -- " ${subdir}) # message(" subdir_assigns before:" ${subdir_assigns} "EOM") @@ -238,7 +245,7 @@ set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "libdrm-amdgpu-dev | libdrm-dev, rocm-core, set(CPACK_DEBIAN_DEV_PACKAGE_RECOMMENDS "libdrm-amdgpu-dev") -set(CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS "libdrm-amdgpu-amdgpu1 | libdrm-amdgpu1, libnuma1, libelf1") +set(CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS "libdrm-amdgpu-amdgpu1 | libdrm-amdgpu1, libnuma1, numactl, libelf1") set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS "libdrm-amdgpu-dev | libdrm-dev, rocm-core-asan, libdrm-amdgpu-amdgpu1 | libdrm-amdgpu1, libnuma1, libelf1") set(CPACK_DEBIAN_ASAN_PACKAGE_RECOMMENDS "libdrm-amdgpu-dev") diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 4fbf5aa145..b2cae0833a 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -56,6 +56,7 @@ #include #include #include +#include // for move_pages #else #define debug_warning(__VA_ARGS__) #endif @@ -3354,12 +3355,29 @@ hsa_status_t Runtime::SvmPrefetch(void* ptr, size_t size, hsa_agent_t agent, op->dep_signals[op->remaining_deps], HSA_SIGNAL_CONDITION_EQ, 0, signal_handler, arg); return false; } + + Agent* dest = Runtime::runtime_singleton_->GetSVMPrefetchAgent(op->base, op->size); - HSA_SVM_ATTRIBUTE attrib; - attrib.type = HSA_SVM_ATTR_PREFETCH_LOC; - attrib.value = op->node_id; - HSAKMT_STATUS error = HSAKMT_CALL(hsaKmtSVMSetAttr(op->base, op->size, 1, &attrib)); - assert(error == HSAKMT_STATUS_SUCCESS && "KFD Prefetch failed."); + if (dest == nullptr || dest->device_type() == Agent::kAmdGpuDevice) { + // Prefetch location is not valid for move_pages usecase. + HSA_SVM_ATTRIBUTE attrib; + attrib.type = HSA_SVM_ATTR_PREFETCH_LOC; + attrib.value = op->node_id; + HSAKMT_STATUS error = HSAKMT_CALL(hsaKmtSVMSetAttr(op->base, op->size, 1, &attrib)); + assert(error == HSAKMT_STATUS_SUCCESS && "KFD Prefetch failed."); + } else { + // Migrate pages to the requested CPU NUMA node + void* base_ptr = op->base; + size_t num_pages = op->size / 4096; + std::vector pages(num_pages); + for (size_t i = 0; i < num_pages; ++i) + pages[i] = static_cast(base_ptr) + i * 4096; + std::vector nodes(num_pages, op->node_id); + std::vector status(num_pages, -1); + + int ret = move_pages(0, num_pages, pages.data(), nodes.data(), status.data(), 0); + assert(ret == 0 && "move_pages failed"); + } removePrefetchRanges(op);