[ROCm/rocshmem commit: 80f0a39866]
Этот коммит содержится в:
Yiltan
2025-11-07 10:12:11 -05:00
коммит произвёл GitHub
родитель 3c25349ec1
Коммит 740cbe6098
5 изменённых файлов: 216 добавлений и 18 удалений
+1 -6
Просмотреть файл
@@ -35,16 +35,11 @@ target_sources(
ibv_wrapper.cpp
gda_team.cpp
queue_pair.cpp
numa_wrapper.cpp
endian.cpp
topology.cpp
)
target_link_libraries(
${PROJECT_NAME}
PUBLIC
numa
)
add_subdirectory(mlx5)
add_subdirectory(bnxt)
add_subdirectory(ionic)
+116
Просмотреть файл
@@ -0,0 +1,116 @@
/******************************************************************************
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#include "numa_wrapper.hpp"
#include "util.hpp"
#include "rocshmem/rocshmem.hpp"
#include <dlfcn.h>
#include <numa.h> // If not found, try installing libnuma-dev (e.g apt-get install libnuma-dev)
#include <numaif.h>
namespace rocshmem {
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wvla-cxx-extension"
NUMAWrapper numa;
NUMAWrapper::NUMAWrapper() {
int err;
numa_handle = dlopen("libnuma.so", RTLD_NOW);
if (!numa_handle) {
DPRINTF("Could not open libnuma. Returning\n");
exit(1);
}
err = init_function_table();
if (err != ROCSHMEM_SUCCESS) {
DPRINTF("Could not construct libnuma function table \n");
exit(1);
}
}
NUMAWrapper::~NUMAWrapper() {
if (numa_handle != nullptr) {
dlclose(numa_handle);
}
}
int NUMAWrapper::init_function_table() {
DLSYM_HELPER(numa, numa_, numa_handle, bitmask_isbitset);
DLSYM_HELPER(numa, numa_, numa_handle, get_mems_allowed);
DLSYM_HELPER(numa, numa_, numa_handle, set_preferred);
DLSYM_HELPER(numa, numa_, numa_handle, num_configured_nodes);
DLSYM_HELPER(numa, numa_, numa_handle, num_configured_cpus);
DLSYM_HELPER(numa, numa_, numa_handle, node_of_cpu);
DLSYM_HELPER(numa, numa_, numa_handle, max_node);
DLSYM_HELPER(numa, , numa_handle, move_pages);
DLSYM_HELPER(numa, numa_, numa_handle, distance);
return ROCSHMEM_SUCCESS;
}
int NUMAWrapper::bitmask_isbitset(const struct bitmask *bmp, unsigned int n) {
return numa.bitmask_isbitset(bmp, n);
}
struct bitmask * NUMAWrapper::get_mems_allowed(void) {
return numa.get_mems_allowed();
}
void NUMAWrapper::set_preferred(int node) {
return numa.set_preferred(node);
}
int NUMAWrapper::num_configured_nodes() {
return numa.num_configured_nodes();
}
int NUMAWrapper::num_configured_cpus(void) {
return numa.num_configured_cpus();
}
int NUMAWrapper::node_of_cpu(int cpu) {
return numa.node_of_cpu(cpu);
}
int NUMAWrapper::max_node(void) {
return numa.max_node();
}
long NUMAWrapper::move_pages(int pid, unsigned long count, void *pages[count],
const int nodes[count], int status[count], int flags) {
return move_pages(pid, count, pages, nodes, status, flags);
}
int NUMAWrapper::distance(int node1, int node2) {
return numa.distance(node1, node2);
}
#pragma clang diagnostic pop
} // namespace rocshmem
+87
Просмотреть файл
@@ -0,0 +1,87 @@
/******************************************************************************
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef LIBRARY_SRC_GDA_NUMA_WRAPPER_HPP_
#define LIBRARY_SRC_GDA_NUMA_WRAPPER_HPP_
namespace rocshmem {
class NUMAWrapper;
extern NUMAWrapper numa;
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wvla-cxx-extension"
class NUMAWrapper {
public:
explicit NUMAWrapper();
virtual ~NUMAWrapper();
int bitmask_isbitset(const struct bitmask *bmp, unsigned int n);
struct bitmask *get_mems_allowed(void);
void set_preferred(int node);
int num_configured_nodes();
int num_configured_cpus(void);
int node_of_cpu(int cpu);
int max_node(void);
long move_pages(int pid, unsigned long count, void *pages[count],
const int nodes[count], int status[count], int flags);
int distance(int node1, int node2);
private:
struct numa_funcs_t {
int (*bitmask_isbitset)(const struct bitmask *bmp, unsigned int n);
struct bitmask* (*get_mems_allowed)(void);
void (*set_preferred)(int node);
int (*num_configured_nodes)();
int (*num_configured_cpus)(void);
int (*node_of_cpu)(int cpu);
int (*max_node)(void);
long (*move_pages)(int pid, unsigned long count, void *pages[count],
const int nodes[count], int status[count], int flags);
int (*distance)(int node1, int node2);
};
/**
* @brief External handle to the numa function table
*/
struct numa_funcs_t numa;
/**
* @brief handle used for the dlopen of the InfiniBand Verbs library
*/
void *numa_handle = nullptr;
/**
* @brief initialize function table
*/
int init_function_table();
};
#pragma clang diagnostic pop
} // namespace rocshmem
#endif // LIBRARY_SRC_GDA_NUMA_WRAPPER_HPP_
+11 -10
Просмотреть файл
@@ -24,6 +24,7 @@
#include "topology.hpp"
#include "ibv_wrapper.hpp"
#include "numa_wrapper.hpp"
using namespace rocshmem;
@@ -53,7 +54,7 @@ namespace rocshmem
pages[i] = (char*)pages[i-1] + pageSize;
}
long const retCode = move_pages(0, numPages, pages.data(), NULL, status.data(), 0);
long const retCode = numa.move_pages(0, numPages, pages.data(), NULL, status.data(), 0);
if (retCode) {
fprintf(stderr,"Unable to collect page table information for allocated memory. "
"Ensure NUMA library is installed properly");
@@ -90,7 +91,7 @@ namespace rocshmem
if (IsCpuMemType(memType)) {
// Set numa policy prior to call to hipHostMalloc
numa_set_preferred(memDevice.memIndex);
numa.set_preferred(memDevice.memIndex);
// Allocate host-pinned memory (should respect NUMA mem policy)
CHECK_HIP(hipHostMalloc((void **)memPtr, numBytes, hipHostMallocNumaUser | hipHostMallocNonCoherent));
@@ -99,7 +100,7 @@ namespace rocshmem
memset(*memPtr, 0, numBytes);
ERR_CHECK(CheckPages((char*)*memPtr, numBytes, memDevice.memIndex));
// Reset to default numa mem policy
numa_set_preferred(-1);
numa.set_preferred(-1);
} else if (IsGpuMemType(memType)) {
int prev_dev;
CHECK_HIP(hipGetDevice(&prev_dev));
@@ -653,7 +654,7 @@ namespace rocshmem
{
switch (exeType) {
case rocshmem::EXE_CPU:
return numa_num_configured_nodes();
return numa.num_configured_nodes();
case rocshmem::EXE_GPU:
{
int numDetectedGpus = 0;
@@ -790,8 +791,8 @@ namespace rocshmem
// Build CPU remapping on first use
// Skip numa nodes that are not configured
if (remappingCpu.empty()) {
for (int node = 0; node <= numa_max_node(); node++)
if (numa_bitmask_isbitset(numa_get_mems_allowed(), node))
for (int node = 0; node <= numa.max_node(); node++)
if (numa.bitmask_isbitset(numa.get_mems_allowed(), node))
remappingCpu.push_back(node);
}
return remappingCpu[origIdx];
@@ -842,7 +843,7 @@ namespace rocshmem
} else {
printf("\nDetected Topology:\n");
printf("==================\n");
printf(" %d configured CPU NUMA node(s) [%d total]\n", numCpus, numa_max_node() + 1);
printf(" %d configured CPU NUMA node(s) [%d total]\n", numCpus, numa.max_node() + 1);
printf(" %d GPU device(s)\n", numGpus);
printf(" %d Supported NIC device(s)\n", numNics);
}
@@ -865,13 +866,13 @@ namespace rocshmem
printf("NUMA %02d (%02d)%c", i, nodeI, sep);
for (int j = 0; j < numCpus; j++) {
int nodeJ = RemappedCpuIndex(j);
int numaDist = numa_distance(nodeI, nodeJ);
int numaDist = numa.distance(nodeI, nodeJ);
printf(" %5d %c", numaDist, sep);
}
int numCpuCores = 0;
for (int j = 0; j < numa_num_configured_cpus(); j++)
if (numa_node_of_cpu(j) == nodeI) numCpuCores++;
for (int j = 0; j < numa.num_configured_cpus(); j++)
if (numa.node_of_cpu(j) == nodeI) numCpuCores++;
printf(" %5d %c", numCpuCores, sep);
for (int j = 0; j < numGpus; j++) {
+1 -2
Просмотреть файл
@@ -27,8 +27,7 @@
#include <cstring>
#include <future>
#include <map>
#include <numa.h> // If not found, try installing libnuma-dev (e.g apt-get install libnuma-dev)
#include <numaif.h>
#include "numa_wrapper.hpp"
#include <random>
#include <set>
#include <sstream>