kfdtest: avoid BigBufStressTest run on NUMA node 0

Because dma32 zone is on node 0, use all system memory on node 0 will
cause TTM eviction to free dma32 zone for other devices which only
work with 32bit physical address. The TTM eviction and restore may take
too long and cause queue timeout.

Running on other NUMA nodes, the NUMA default memory policy is
MPOL_PREFERRED, means TTM will get pages from local node first, and then
get remaining pages from other nodes. Check /proc/buddyinfo can confirm
this.

Reset NUMA bind to all after the test.



Change-Id: I39b373c07a2d5aa396f5c7602bffabab0481930f
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
이 커밋은 다음에 포함됨:
Philip Yang
2019-06-13 15:22:52 -04:00
부모 3f2d2e67c9
커밋 36776e9917
3개의 변경된 파일31개의 추가작업 그리고 1개의 파일을 삭제
+1 -1
파일 보기
@@ -104,7 +104,7 @@ link_directories(${SP3_DIR})
add_executable(kfdtest ${SRC_FILES})
target_link_libraries(kfdtest ${HSAKMT_LIBRARIES} ${DRM_LIBRARIES} ${DRM_AMDGPU_LIBRARIES} pthread m stdc++ rt amdsp3)
target_link_libraries(kfdtest ${HSAKMT_LIBRARIES} ${DRM_LIBRARIES} ${DRM_AMDGPU_LIBRARIES} pthread m stdc++ rt amdsp3 numa)
configure_file ( scripts/kfdtest.exclude kfdtest.exclude COPYONLY )
configure_file ( scripts/run_kfdtest.sh run_kfdtest.sh COPYONLY )
+29
파일 보기
@@ -30,6 +30,7 @@
#include <unistd.h>
#include <sys/types.h>
#include <signal.h>
#include <numa.h>
#include <vector>
#include "Dispatch.hpp"
#include "PM4Queue.hpp"
@@ -809,6 +810,24 @@ void KFDMemoryTest::BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB,
<< vramSizeMB * 15 / 16 << "MB" << std::endl;
}
void KFDMemoryTest::NumaNodeBind(const char *nodeStr) {
if (numa_available() != -1) {
int num_node = numa_num_task_nodes();
if (num_node > 1) {
struct bitmask *nodemask;
LOG() << "NUMA total nodes " << num_node << ", bind to " << nodeStr << std::endl;
nodemask = numa_parse_nodestring(nodeStr);
if (nodemask) {
numa_bind(nodemask);
numa_free_nodemask(nodemask);
}
}
}
}
/* BigBufferStressTest allocs, maps/unmaps, and frees the biggest possible system
* buffers. Its size is found using binary search in the range (0, RAM SIZE) with
* a granularity of 128M. Repeat the similar logic on local buffers (VRAM).
@@ -839,6 +858,13 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) {
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
/* Don't run on node 0 on multiple NUMA node machine because dma32 zone is on node 0,
* Use all memory including dma32 zone on node 0 will cause TTM eviction to free dma32
* zone for other devices which supports 32bit physical address. The eviction and
* restore may retry if busy and cause queue timeout and test failure.
*/
NumaNodeBind("!0");
BigBufferSystemMemory(defaultGPUNode, granularityMB, NULL);
BigBufferVRAM(defaultGPUNode, granularityMB, NULL);
@@ -893,6 +919,9 @@ TEST_F(KFDMemoryTest, BigBufferStressTest) {
}
EXPECT_SUCCESS(queue.Destroy());
/* Reset to run on all task nodes */
NumaNodeBind("all");
TEST_END
}
+1
파일 보기
@@ -44,6 +44,7 @@ class KFDMemoryTest : public KFDBaseComponentTest {
void BigBufferVRAM(int defaultGPUNode, HSAuint64 granularityMB, HSAuint64 *lastSize);
void BigBufferSystemMemory(int defaultGPUNode, HSAuint64 granularityMB, HSAuint64 *lastSize);
void NumaNodeBind(const char *nodeStr);
};
#endif // __KFD_MEMORY_TEST__H__