SWDEV-269879 - Adding function that recommends optimal thread count
Change-Id: I42eb94a058c1b7f9253182e16ff1c3389a836d61
Bu işleme şunda yer alıyor:
işlemeyi yapan:
Jatin Chaudhary
ebeveyn
9a2e762ccb
işleme
d9496c149b
@@ -573,10 +573,13 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindHtoH(void) {
|
||||
|
||||
|
||||
void HipMemcpyWithStreamMultiThreadtests::TestwithMultiThreaded(ops op) {
|
||||
int n = min(THREADS * std::thread::hardware_concurrency(), MAX_THREADS);
|
||||
size_t thread_count = getHostThreadCount();
|
||||
if (thread_count == 0) {
|
||||
failed("Thread Count is 0");
|
||||
}
|
||||
std::vector<joinable_thread> threads;
|
||||
|
||||
for (uint32_t i = 0; i < n; i++) {
|
||||
for (uint32_t i = 0; i < thread_count; i++) {
|
||||
threads.emplace_back(std::thread{[&] {
|
||||
switch ( op ) {
|
||||
case ops::TestwithOnestream:
|
||||
|
||||
@@ -116,13 +116,17 @@ bool testhipMemset2DAsyncWithKernel() {
|
||||
|
||||
bool testhipMemset2DAsyncMultiThread() {
|
||||
validateCount = 0;
|
||||
std::thread t[NUM_THREADS];
|
||||
auto thread_count = getHostThreadCount(200, NUM_THREADS);
|
||||
if (thread_count == 0) {
|
||||
failed("Thread count is 0");
|
||||
}
|
||||
std::thread *t = new std::thread[thread_count];
|
||||
|
||||
memAllocate();
|
||||
|
||||
printf("info: Queueing up hipMemset2DAsync jobs over multiple threads\n");
|
||||
for (int i = 0 ; i < ITER ; i++) {
|
||||
for (int k = 0 ; k < NUM_THREADS ; k++) {
|
||||
for (int k = 0 ; k < thread_count; k++) {
|
||||
if (k%2) {
|
||||
t[k] = std::thread(queueJobsForhipMemset2DAsync, A_d, A_h, pitch_A,
|
||||
width);
|
||||
@@ -131,7 +135,7 @@ bool testhipMemset2DAsyncMultiThread() {
|
||||
width);
|
||||
}
|
||||
}
|
||||
for (int j = 0 ; j < NUM_THREADS ; j++) {
|
||||
for (int j = 0 ; j < thread_count; j++) {
|
||||
t[j].join();
|
||||
}
|
||||
|
||||
@@ -143,6 +147,7 @@ bool testhipMemset2DAsyncMultiThread() {
|
||||
}
|
||||
}
|
||||
memDeallocate();
|
||||
delete[] t;
|
||||
testResult = (validateCount == (ITER * elements)) ? true : false;
|
||||
return testResult;
|
||||
}
|
||||
|
||||
@@ -142,7 +142,12 @@ void run_multi_threads(uint32_t n, const std::vector<char>& buffer) {
|
||||
int main() {
|
||||
HIPCHECK(hipInit(0));
|
||||
auto buffer = load_file();
|
||||
run_multi_threads(min(THREADS * std::thread::hardware_concurrency(), MAX_THREADS), buffer);
|
||||
auto file_size = buffer.size() / (1024 * 1024);
|
||||
auto thread_count = getHostThreadCount(file_size + 10);
|
||||
if(thread_count == 0) {
|
||||
failed("Thread Count is zero");
|
||||
}
|
||||
run_multi_threads(thread_count, buffer);
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -131,7 +131,7 @@ void run_multi_threads(uint32_t n, const std::vector<char>& buffer) {
|
||||
int main() {
|
||||
HIPCHECK(hipInit(0));
|
||||
auto buffer = load_file();
|
||||
run_multi_threads(min(THREADS * std::thread::hardware_concurrency(), MAX_THREADS), buffer);
|
||||
run_multi_threads(getThreadCount(), buffer);
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -129,8 +129,12 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream));
|
||||
|
||||
std::thread T[NUM_THREADS];
|
||||
for (int i = 0; i < NUM_THREADS; i++) {
|
||||
auto thread_count = getHostThreadCount(200, NUM_THREADS);
|
||||
if (thread_count == 0) {
|
||||
failed("Thread count is 0");
|
||||
}
|
||||
std::thread *T = new std::thread[thread_count];
|
||||
for (int i = 0; i < thread_count; i++) {
|
||||
// Use different callback for every even thread
|
||||
// The callbacks will be added to same stream from different threads
|
||||
if ((i%2) == 0)
|
||||
@@ -140,7 +144,7 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
|
||||
// Wait until all the threads finish their execution
|
||||
for (int i = 0; i < NUM_THREADS; i++) {
|
||||
for (int i = 0; i < thread_count; i++) {
|
||||
T[i].join();
|
||||
}
|
||||
|
||||
@@ -155,11 +159,12 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
// Cb_count should match total number of callbacks added from both threads
|
||||
// Data_mismatch will be updated if there is problem in data validation
|
||||
if (Cb_count.load() != NUM_THREADS) {
|
||||
if (Cb_count.load() != thread_count) {
|
||||
failed("All callbacks for stream did not get called!");
|
||||
} else if (Data_mismatch.load() != 0) {
|
||||
failed("Mismatch found in the result of the computation!");
|
||||
}
|
||||
delete[] T;
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -21,6 +21,13 @@ THE SOFTWARE.
|
||||
*/
|
||||
#include "test_common.h"
|
||||
|
||||
#include <thread>
|
||||
#ifdef __linux__
|
||||
#include <sys/sysinfo.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
// standard global variables that can be set on command line
|
||||
size_t N = 4 * 1024 * 1024;
|
||||
char memsetval = 0x42;
|
||||
@@ -45,6 +52,36 @@ const char* PATH_SEPERATOR_STR = "/";
|
||||
const char* NULL_DEVICE = "/dev/null";
|
||||
#endif
|
||||
|
||||
// Get Free Memory from the system
|
||||
static size_t getMemoryAmount() {
|
||||
#if __linux__
|
||||
struct sysinfo info;
|
||||
int _ = sysinfo(&info);
|
||||
return info.freeram / (1024 * 1024); // MB
|
||||
#elif defined(_WIN32)
|
||||
MEMORYSTATUSEX statex;
|
||||
statex.dwLength = sizeof(statex);
|
||||
GlobalMemoryStatusEx(&statex);
|
||||
return (statex.ullAvailPhys / (1024 * 1024)); // MB
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t getHostThreadCount(const size_t memPerThread, const size_t maxThreads) {
|
||||
if (memPerThread == 0) return 0;
|
||||
auto memAmount = getMemoryAmount();
|
||||
const auto processor_count = std::thread::hardware_concurrency();
|
||||
if (processor_count == 0 || memAmount == 0) return 0;
|
||||
size_t thread_count = 0;
|
||||
if ((processor_count * memPerThread) < memAmount)
|
||||
thread_count = processor_count;
|
||||
else
|
||||
thread_count = reinterpret_cast<size_t>(memAmount / memPerThread);
|
||||
if (maxThreads > 0) {
|
||||
return (thread_count > maxThreads) ? maxThreads : thread_count;
|
||||
}
|
||||
return thread_count;
|
||||
}
|
||||
|
||||
namespace HipTest {
|
||||
|
||||
|
||||
|
||||
@@ -158,6 +158,9 @@ extern const char* NULL_DEVICE;
|
||||
#define TYPENAME(T) "?"
|
||||
#endif
|
||||
|
||||
// Get Optimal Thread count size
|
||||
size_t getHostThreadCount(const size_t memPerThread = 200 /* MB */, const size_t maxThreads = 0);
|
||||
|
||||
namespace HipTest {
|
||||
|
||||
// Returns the current system time in microseconds
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle