From d9496c149bbe62804322ba8adfa4d3953ca33cea Mon Sep 17 00:00:00 2001 From: cjatin Date: Mon, 18 Jan 2021 15:04:11 +0530 Subject: [PATCH] SWDEV-269879 - Adding function that recommends optimal thread count Change-Id: I42eb94a058c1b7f9253182e16ff1c3389a836d61 --- .../memory/hipMemcpyWithStreamMultiThread.cpp | 7 +++- .../hipMemset2DAsyncMultiThreadAndKernel.cpp | 11 ++++-- .../hipModuleLoadDataMultThreadOnMultGPU.cpp | 7 +++- .../module/hipModuleLoadDataMultThreaded.cpp | 2 +- .../stream/hipStreamACb_MultiThread.cpp | 13 +++++-- hipamd/tests/src/test_common.cpp | 37 +++++++++++++++++++ hipamd/tests/src/test_common.h | 3 ++ 7 files changed, 69 insertions(+), 11 deletions(-) diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyWithStreamMultiThread.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyWithStreamMultiThread.cpp index 0e95ca9c04..eecc7dd467 100644 --- a/hipamd/tests/src/runtimeApi/memory/hipMemcpyWithStreamMultiThread.cpp +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyWithStreamMultiThread.cpp @@ -573,10 +573,13 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindHtoH(void) { void HipMemcpyWithStreamMultiThreadtests::TestwithMultiThreaded(ops op) { - int n = min(THREADS * std::thread::hardware_concurrency(), MAX_THREADS); + size_t thread_count = getHostThreadCount(); + if (thread_count == 0) { + failed("Thread Count is 0"); + } std::vector threads; - for (uint32_t i = 0; i < n; i++) { + for (uint32_t i = 0; i < thread_count; i++) { threads.emplace_back(std::thread{[&] { switch ( op ) { case ops::TestwithOnestream: diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemset2DAsyncMultiThreadAndKernel.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemset2DAsyncMultiThreadAndKernel.cpp index a9097e0bfe..1e73d75704 100644 --- a/hipamd/tests/src/runtimeApi/memory/hipMemset2DAsyncMultiThreadAndKernel.cpp +++ b/hipamd/tests/src/runtimeApi/memory/hipMemset2DAsyncMultiThreadAndKernel.cpp @@ -116,13 +116,17 @@ bool testhipMemset2DAsyncWithKernel() { bool testhipMemset2DAsyncMultiThread() { validateCount = 0; - std::thread t[NUM_THREADS]; + auto thread_count = getHostThreadCount(200, NUM_THREADS); + if (thread_count == 0) { + failed("Thread count is 0"); + } + std::thread *t = new std::thread[thread_count]; memAllocate(); printf("info: Queueing up hipMemset2DAsync jobs over multiple threads\n"); for (int i = 0 ; i < ITER ; i++) { - for (int k = 0 ; k < NUM_THREADS ; k++) { + for (int k = 0 ; k < thread_count; k++) { if (k%2) { t[k] = std::thread(queueJobsForhipMemset2DAsync, A_d, A_h, pitch_A, width); @@ -131,7 +135,7 @@ bool testhipMemset2DAsyncMultiThread() { width); } } - for (int j = 0 ; j < NUM_THREADS ; j++) { + for (int j = 0 ; j < thread_count; j++) { t[j].join(); } @@ -143,6 +147,7 @@ bool testhipMemset2DAsyncMultiThread() { } } memDeallocate(); + delete[] t; testResult = (validateCount == (ITER * elements)) ? true : false; return testResult; } diff --git a/hipamd/tests/src/runtimeApi/module/hipModuleLoadDataMultThreadOnMultGPU.cpp b/hipamd/tests/src/runtimeApi/module/hipModuleLoadDataMultThreadOnMultGPU.cpp index 0288137479..b5341b710b 100644 --- a/hipamd/tests/src/runtimeApi/module/hipModuleLoadDataMultThreadOnMultGPU.cpp +++ b/hipamd/tests/src/runtimeApi/module/hipModuleLoadDataMultThreadOnMultGPU.cpp @@ -142,7 +142,12 @@ void run_multi_threads(uint32_t n, const std::vector& buffer) { int main() { HIPCHECK(hipInit(0)); auto buffer = load_file(); - run_multi_threads(min(THREADS * std::thread::hardware_concurrency(), MAX_THREADS), buffer); + auto file_size = buffer.size() / (1024 * 1024); + auto thread_count = getHostThreadCount(file_size + 10); + if(thread_count == 0) { + failed("Thread Count is zero"); + } + run_multi_threads(thread_count, buffer); passed(); } diff --git a/hipamd/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp b/hipamd/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp index c91acea68f..51ea72fbe5 100644 --- a/hipamd/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp +++ b/hipamd/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp @@ -131,7 +131,7 @@ void run_multi_threads(uint32_t n, const std::vector& buffer) { int main() { HIPCHECK(hipInit(0)); auto buffer = load_file(); - run_multi_threads(min(THREADS * std::thread::hardware_concurrency(), MAX_THREADS), buffer); + run_multi_threads(getThreadCount(), buffer); passed(); } diff --git a/hipamd/tests/src/runtimeApi/stream/hipStreamACb_MultiThread.cpp b/hipamd/tests/src/runtimeApi/stream/hipStreamACb_MultiThread.cpp index 687d79fb10..ce405fbb65 100644 --- a/hipamd/tests/src/runtimeApi/stream/hipStreamACb_MultiThread.cpp +++ b/hipamd/tests/src/runtimeApi/stream/hipStreamACb_MultiThread.cpp @@ -129,8 +129,12 @@ int main(int argc, char* argv[]) { HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); - std::thread T[NUM_THREADS]; - for (int i = 0; i < NUM_THREADS; i++) { + auto thread_count = getHostThreadCount(200, NUM_THREADS); + if (thread_count == 0) { + failed("Thread count is 0"); + } + std::thread *T = new std::thread[thread_count]; + for (int i = 0; i < thread_count; i++) { // Use different callback for every even thread // The callbacks will be added to same stream from different threads if ((i%2) == 0) @@ -140,7 +144,7 @@ int main(int argc, char* argv[]) { } // Wait until all the threads finish their execution - for (int i = 0; i < NUM_THREADS; i++) { + for (int i = 0; i < thread_count; i++) { T[i].join(); } @@ -155,11 +159,12 @@ int main(int argc, char* argv[]) { // Cb_count should match total number of callbacks added from both threads // Data_mismatch will be updated if there is problem in data validation - if (Cb_count.load() != NUM_THREADS) { + if (Cb_count.load() != thread_count) { failed("All callbacks for stream did not get called!"); } else if (Data_mismatch.load() != 0) { failed("Mismatch found in the result of the computation!"); } + delete[] T; passed(); } diff --git a/hipamd/tests/src/test_common.cpp b/hipamd/tests/src/test_common.cpp index 1c0dcc8c34..f0a0883115 100644 --- a/hipamd/tests/src/test_common.cpp +++ b/hipamd/tests/src/test_common.cpp @@ -21,6 +21,13 @@ THE SOFTWARE. */ #include "test_common.h" +#include +#ifdef __linux__ +#include +#elif defined(_WIN32) +#include +#endif + // standard global variables that can be set on command line size_t N = 4 * 1024 * 1024; char memsetval = 0x42; @@ -45,6 +52,36 @@ const char* PATH_SEPERATOR_STR = "/"; const char* NULL_DEVICE = "/dev/null"; #endif +// Get Free Memory from the system +static size_t getMemoryAmount() { +#if __linux__ + struct sysinfo info; + int _ = sysinfo(&info); + return info.freeram / (1024 * 1024); // MB +#elif defined(_WIN32) + MEMORYSTATUSEX statex; + statex.dwLength = sizeof(statex); + GlobalMemoryStatusEx(&statex); + return (statex.ullAvailPhys / (1024 * 1024)); // MB +#endif +} + +size_t getHostThreadCount(const size_t memPerThread, const size_t maxThreads) { + if (memPerThread == 0) return 0; + auto memAmount = getMemoryAmount(); + const auto processor_count = std::thread::hardware_concurrency(); + if (processor_count == 0 || memAmount == 0) return 0; + size_t thread_count = 0; + if ((processor_count * memPerThread) < memAmount) + thread_count = processor_count; + else + thread_count = reinterpret_cast(memAmount / memPerThread); + if (maxThreads > 0) { + return (thread_count > maxThreads) ? maxThreads : thread_count; + } + return thread_count; +} + namespace HipTest { diff --git a/hipamd/tests/src/test_common.h b/hipamd/tests/src/test_common.h index 6ef1aaff6c..593e7bcd5d 100755 --- a/hipamd/tests/src/test_common.h +++ b/hipamd/tests/src/test_common.h @@ -158,6 +158,9 @@ extern const char* NULL_DEVICE; #define TYPENAME(T) "?" #endif +// Get Optimal Thread count size +size_t getHostThreadCount(const size_t memPerThread = 200 /* MB */, const size_t maxThreads = 0); + namespace HipTest { // Returns the current system time in microseconds