diff --git a/tests/kfdtest/CMakeLists.txt b/tests/kfdtest/CMakeLists.txt index b79f880a74..523d70356f 100644 --- a/tests/kfdtest/CMakeLists.txt +++ b/tests/kfdtest/CMakeLists.txt @@ -104,7 +104,7 @@ link_directories(${SP3_DIR}) add_executable(kfdtest ${SRC_FILES}) -target_link_libraries(kfdtest ${HSAKMT_LIBRARIES} ${DRM_LIBRARIES} ${DRM_AMDGPU_LIBRARIES} pthread m stdc++ rt amdsp3) +target_link_libraries(kfdtest ${HSAKMT_LIBRARIES} ${DRM_LIBRARIES} ${DRM_AMDGPU_LIBRARIES} pthread m stdc++ rt amdsp3 numa) configure_file ( scripts/kfdtest.exclude kfdtest.exclude COPYONLY ) configure_file ( scripts/run_kfdtest.sh run_kfdtest.sh COPYONLY ) diff --git a/tests/kfdtest/src/KFDMemoryTest.cpp b/tests/kfdtest/src/KFDMemoryTest.cpp index 1328047b73..bc076f73f1 100644 --- a/tests/kfdtest/src/KFDMemoryTest.cpp +++ b/tests/kfdtest/src/KFDMemoryTest.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "Dispatch.hpp" #include "PM4Queue.hpp" @@ -809,6 +810,24 @@ void KFDMemoryTest::BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB, << vramSizeMB * 15 / 16 << "MB" << std::endl; } +void KFDMemoryTest::NumaNodeBind(const char *nodeStr) { + if (numa_available() != -1) { + int num_node = numa_num_task_nodes(); + + if (num_node > 1) { + struct bitmask *nodemask; + + LOG() << "NUMA total nodes " << num_node << ", bind to " << nodeStr << std::endl; + + nodemask = numa_parse_nodestring(nodeStr); + if (nodemask) { + numa_bind(nodemask); + numa_free_nodemask(nodemask); + } + } + } +} + /* BigBufferStressTest allocs, maps/unmaps, and frees the biggest possible system * buffers. Its size is found using binary search in the range (0, RAM SIZE) with * a granularity of 128M. Repeat the similar logic on local buffers (VRAM). @@ -839,6 +858,13 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) { int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; + /* Don't run on node 0 on multiple NUMA node machine because dma32 zone is on node 0, + * Use all memory including dma32 zone on node 0 will cause TTM eviction to free dma32 + * zone for other devices which supports 32bit physical address. The eviction and + * restore may retry if busy and cause queue timeout and test failure. + */ + NumaNodeBind("!0"); + BigBufferSystemMemory(defaultGPUNode, granularityMB, NULL); BigBufferVRAM(defaultGPUNode, granularityMB, NULL); @@ -893,6 +919,9 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) { } EXPECT_SUCCESS(queue.Destroy()); + /* Reset to run on all task nodes */ + NumaNodeBind("all"); + TEST_END } diff --git a/tests/kfdtest/src/KFDMemoryTest.hpp b/tests/kfdtest/src/KFDMemoryTest.hpp index 947968999e..8f1f78b9ac 100644 --- a/tests/kfdtest/src/KFDMemoryTest.hpp +++ b/tests/kfdtest/src/KFDMemoryTest.hpp @@ -44,6 +44,7 @@ class KFDMemoryTest : public KFDBaseComponentTest { void BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB, HSAuint64 *lastSize); void BigBufferSystemMemory(int defaultGPUNode, HSAuint64 granularityMB, HSAuint64 *lastSize); + void NumaNodeBind(const char *nodeStr); }; #endif // __KFD_MEMORY_TEST__H__