SWDEV-269879 - Adding function that recommends optimal thread count

Change-Id: I42eb94a058c1b7f9253182e16ff1c3389a836d61
Bu işleme şunda yer alıyor:
cjatin
2021-01-18 15:04:11 +05:30
işlemeyi yapan: Jatin Chaudhary
ebeveyn 9a2e762ccb
işleme d9496c149b
7 değiştirilmiş dosya ile 69 ekleme ve 11 silme
+5 -2
Dosyayı Görüntüle
@@ -573,10 +573,13 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindHtoH(void) {
void HipMemcpyWithStreamMultiThreadtests::TestwithMultiThreaded(ops op) {
int n = min(THREADS * std::thread::hardware_concurrency(), MAX_THREADS);
size_t thread_count = getHostThreadCount();
if (thread_count == 0) {
failed("Thread Count is 0");
}
std::vector<joinable_thread> threads;
for (uint32_t i = 0; i < n; i++) {
for (uint32_t i = 0; i < thread_count; i++) {
threads.emplace_back(std::thread{[&] {
switch ( op ) {
case ops::TestwithOnestream:
+8 -3
Dosyayı Görüntüle
@@ -116,13 +116,17 @@ bool testhipMemset2DAsyncWithKernel() {
bool testhipMemset2DAsyncMultiThread() {
validateCount = 0;
std::thread t[NUM_THREADS];
auto thread_count = getHostThreadCount(200, NUM_THREADS);
if (thread_count == 0) {
failed("Thread count is 0");
}
std::thread *t = new std::thread[thread_count];
memAllocate();
printf("info: Queueing up hipMemset2DAsync jobs over multiple threads\n");
for (int i = 0 ; i < ITER ; i++) {
for (int k = 0 ; k < NUM_THREADS ; k++) {
for (int k = 0 ; k < thread_count; k++) {
if (k%2) {
t[k] = std::thread(queueJobsForhipMemset2DAsync, A_d, A_h, pitch_A,
width);
@@ -131,7 +135,7 @@ bool testhipMemset2DAsyncMultiThread() {
width);
}
}
for (int j = 0 ; j < NUM_THREADS ; j++) {
for (int j = 0 ; j < thread_count; j++) {
t[j].join();
}
@@ -143,6 +147,7 @@ bool testhipMemset2DAsyncMultiThread() {
}
}
memDeallocate();
delete[] t;
testResult = (validateCount == (ITER * elements)) ? true : false;
return testResult;
}
+6 -1
Dosyayı Görüntüle
@@ -142,7 +142,12 @@ void run_multi_threads(uint32_t n, const std::vector<char>& buffer) {
int main() {
HIPCHECK(hipInit(0));
auto buffer = load_file();
run_multi_threads(min(THREADS * std::thread::hardware_concurrency(), MAX_THREADS), buffer);
auto file_size = buffer.size() / (1024 * 1024);
auto thread_count = getHostThreadCount(file_size + 10);
if(thread_count == 0) {
failed("Thread Count is zero");
}
run_multi_threads(thread_count, buffer);
passed();
}
+1 -1
Dosyayı Görüntüle
@@ -131,7 +131,7 @@ void run_multi_threads(uint32_t n, const std::vector<char>& buffer) {
int main() {
HIPCHECK(hipInit(0));
auto buffer = load_file();
run_multi_threads(min(THREADS * std::thread::hardware_concurrency(), MAX_THREADS), buffer);
run_multi_threads(getThreadCount(), buffer);
passed();
}
+9 -4
Dosyayı Görüntüle
@@ -129,8 +129,12 @@ int main(int argc, char* argv[]) {
HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream));
std::thread T[NUM_THREADS];
for (int i = 0; i < NUM_THREADS; i++) {
auto thread_count = getHostThreadCount(200, NUM_THREADS);
if (thread_count == 0) {
failed("Thread count is 0");
}
std::thread *T = new std::thread[thread_count];
for (int i = 0; i < thread_count; i++) {
// Use different callback for every even thread
// The callbacks will be added to same stream from different threads
if ((i%2) == 0)
@@ -140,7 +144,7 @@ int main(int argc, char* argv[]) {
}
// Wait until all the threads finish their execution
for (int i = 0; i < NUM_THREADS; i++) {
for (int i = 0; i < thread_count; i++) {
T[i].join();
}
@@ -155,11 +159,12 @@ int main(int argc, char* argv[]) {
// Cb_count should match total number of callbacks added from both threads
// Data_mismatch will be updated if there is problem in data validation
if (Cb_count.load() != NUM_THREADS) {
if (Cb_count.load() != thread_count) {
failed("All callbacks for stream did not get called!");
} else if (Data_mismatch.load() != 0) {
failed("Mismatch found in the result of the computation!");
}
delete[] T;
passed();
}
+37
Dosyayı Görüntüle
@@ -21,6 +21,13 @@ THE SOFTWARE.
*/
#include "test_common.h"
#include <thread>
#ifdef __linux__
#include <sys/sysinfo.h>
#elif defined(_WIN32)
#include <windows.h>
#endif
// standard global variables that can be set on command line
size_t N = 4 * 1024 * 1024;
char memsetval = 0x42;
@@ -45,6 +52,36 @@ const char* PATH_SEPERATOR_STR = "/";
const char* NULL_DEVICE = "/dev/null";
#endif
// Get Free Memory from the system
static size_t getMemoryAmount() {
#if __linux__
struct sysinfo info;
int _ = sysinfo(&info);
return info.freeram / (1024 * 1024); // MB
#elif defined(_WIN32)
MEMORYSTATUSEX statex;
statex.dwLength = sizeof(statex);
GlobalMemoryStatusEx(&statex);
return (statex.ullAvailPhys / (1024 * 1024)); // MB
#endif
}
size_t getHostThreadCount(const size_t memPerThread, const size_t maxThreads) {
if (memPerThread == 0) return 0;
auto memAmount = getMemoryAmount();
const auto processor_count = std::thread::hardware_concurrency();
if (processor_count == 0 || memAmount == 0) return 0;
size_t thread_count = 0;
if ((processor_count * memPerThread) < memAmount)
thread_count = processor_count;
else
thread_count = reinterpret_cast<size_t>(memAmount / memPerThread);
if (maxThreads > 0) {
return (thread_count > maxThreads) ? maxThreads : thread_count;
}
return thread_count;
}
namespace HipTest {
+3
Dosyayı Görüntüle
@@ -158,6 +158,9 @@ extern const char* NULL_DEVICE;
#define TYPENAME(T) "?"
#endif
// Get Optimal Thread count size
size_t getHostThreadCount(const size_t memPerThread = 200 /* MB */, const size_t maxThreads = 0);
namespace HipTest {
// Returns the current system time in microseconds