From 36776e99176cf71356b6ea86f44781dab96834af Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 13 Jun 2019 15:22:52 -0400 Subject: [PATCH] kfdtest: avoid BigBufStressTest run on NUMA node 0 Because dma32 zone is on node 0, use all system memory on node 0 will cause TTM eviction to free dma32 zone for other devices which only work with 32bit physical address. The TTM eviction and restore may take too long and cause queue timeout. Running on other NUMA nodes, the NUMA default memory policy is MPOL_PREFERRED, means TTM will get pages from local node first, and then get remaining pages from other nodes. Check /proc/buddyinfo can confirm this. Reset NUMA bind to all after the test. Change-Id: I39b373c07a2d5aa396f5c7602bffabab0481930f Signed-off-by: Philip Yang --- tests/kfdtest/CMakeLists.txt | 2 +- tests/kfdtest/src/KFDMemoryTest.cpp | 29 +++++++++++++++++++++++++++++ tests/kfdtest/src/KFDMemoryTest.hpp | 1 + 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/tests/kfdtest/CMakeLists.txt b/tests/kfdtest/CMakeLists.txt index b79f880a74..523d70356f 100644 --- a/tests/kfdtest/CMakeLists.txt +++ b/tests/kfdtest/CMakeLists.txt @@ -104,7 +104,7 @@ link_directories(${SP3_DIR}) add_executable(kfdtest ${SRC_FILES}) -target_link_libraries(kfdtest ${HSAKMT_LIBRARIES} ${DRM_LIBRARIES} ${DRM_AMDGPU_LIBRARIES} pthread m stdc++ rt amdsp3) +target_link_libraries(kfdtest ${HSAKMT_LIBRARIES} ${DRM_LIBRARIES} ${DRM_AMDGPU_LIBRARIES} pthread m stdc++ rt amdsp3 numa) configure_file ( scripts/kfdtest.exclude kfdtest.exclude COPYONLY ) configure_file ( scripts/run_kfdtest.sh run_kfdtest.sh COPYONLY ) diff --git a/tests/kfdtest/src/KFDMemoryTest.cpp b/tests/kfdtest/src/KFDMemoryTest.cpp index 1328047b73..bc076f73f1 100644 --- a/tests/kfdtest/src/KFDMemoryTest.cpp +++ b/tests/kfdtest/src/KFDMemoryTest.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "Dispatch.hpp" #include "PM4Queue.hpp" @@ -809,6 +810,24 @@ void KFDMemoryTest::BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB, << vramSizeMB * 15 / 16 << "MB" << std::endl; } +void KFDMemoryTest::NumaNodeBind(const char *nodeStr) { + if (numa_available() != -1) { + int num_node = numa_num_task_nodes(); + + if (num_node > 1) { + struct bitmask *nodemask; + + LOG() << "NUMA total nodes " << num_node << ", bind to " << nodeStr << std::endl; + + nodemask = numa_parse_nodestring(nodeStr); + if (nodemask) { + numa_bind(nodemask); + numa_free_nodemask(nodemask); + } + } + } +} + /* BigBufferStressTest allocs, maps/unmaps, and frees the biggest possible system * buffers. Its size is found using binary search in the range (0, RAM SIZE) with * a granularity of 128M. Repeat the similar logic on local buffers (VRAM). @@ -839,6 +858,13 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) { int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; + /* Don't run on node 0 on multiple NUMA node machine because dma32 zone is on node 0, + * Use all memory including dma32 zone on node 0 will cause TTM eviction to free dma32 + * zone for other devices which supports 32bit physical address. The eviction and + * restore may retry if busy and cause queue timeout and test failure. + */ + NumaNodeBind("!0"); + BigBufferSystemMemory(defaultGPUNode, granularityMB, NULL); BigBufferVRAM(defaultGPUNode, granularityMB, NULL); @@ -893,6 +919,9 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) { } EXPECT_SUCCESS(queue.Destroy()); + /* Reset to run on all task nodes */ + NumaNodeBind("all"); + TEST_END } diff --git a/tests/kfdtest/src/KFDMemoryTest.hpp b/tests/kfdtest/src/KFDMemoryTest.hpp index 947968999e..8f1f78b9ac 100644 --- a/tests/kfdtest/src/KFDMemoryTest.hpp +++ b/tests/kfdtest/src/KFDMemoryTest.hpp @@ -44,6 +44,7 @@ class KFDMemoryTest : public KFDBaseComponentTest { void BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB, HSAuint64 *lastSize); void BigBufferSystemMemory(int defaultGPUNode, HSAuint64 granularityMB, HSAuint64 *lastSize); + void NumaNodeBind(const char *nodeStr); }; #endif // __KFD_MEMORY_TEST__H__