[HipPerf] report performance based on wall time only for hipPerfDevMemReadSpeed/hipPerfDevMemWriteSpeed

Change-Id: I1fda2ec76da6fad6852d328e0a3fc39e28af57bb
This commit is contained in:
Aryan Salmanpour
2020-08-13 14:23:58 -04:00
orang tua 87558f6448
melakukan f7570dce03
2 mengubah file dengan 2 tambahan dan 60 penghapusan
+1 -30
Melihat File
@@ -99,8 +99,6 @@ int main(int argc, char* argv[]) {
HIPCHECK(hipMemcpy(dSrc, hSrc, nBytes, hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy(dDst, hDst, sizeof(uint), hipMemcpyHostToDevice));
cout << "info: warm up launch for 'read_kernel' on the stream " << stream << endl;
hipLaunchKernelGGL(read_kernel, dim3(blocks), dim3(threadsPerBlock), 0, stream, dSrc, N, dDst);
HIPCHECK(hipMemcpy(hDst, dDst, sizeof(uint), hipMemcpyDeviceToHost));
hipDeviceSynchronize();
@@ -111,9 +109,6 @@ int main(int argc, char* argv[]) {
HIPCHECK(hipErrorUnknown);
}
cout << "info: data validated for warm up launch for 'read_kernel'" << endl;
cout << "info: launching 'read_kernel' on the stream " << stream << " for "<< nIter << " iterations"<< endl;
// measure performance based on host time
auto all_start = chrono::steady_clock::now();
@@ -129,31 +124,7 @@ int main(int argc, char* argv[]) {
double perf = ((double)nBytes * nIter * (double)(1e-09)) / all_kernel_time.count();
cout << "info: average read speed of " << perf << " GB/s " << "achieved for memory size of " <<
nBytes / (1024 * 1024) << " MB, calculated based on host time" << endl;
// measure performance based on events time
hipEvent_t start, stop;
HIPCHECK(hipEventCreate(&start));
HIPCHECK(hipEventCreate(&stop));
float allEventMs = 0;
for(int i = 0; i < nIter; i++) {
HIPCHECK(hipEventRecord(start, NULL));
hipLaunchKernelGGL(read_kernel, dim3(blocks), dim3(threadsPerBlock), 0, stream, dSrc, N, dDst);
HIPCHECK(hipEventRecord(stop, NULL));
HIPCHECK(hipEventSynchronize(stop));
float eventMs = 1.0f;
HIPCHECK(hipEventElapsedTime(&eventMs, start, stop));
allEventMs += eventMs;
}
double perfe = ((double)nBytes * nIter * (double)(1e-06)) / allEventMs;
cout << "info: average read speed of " << perfe << " GB/s " << "achieved for memory size of " <<
nBytes / (1024 * 1024) << " MB, calculated based on events time" << endl;
nBytes / (1024 * 1024) << " MB" << endl;
delete [] hSrc;
delete hDst;
@@ -88,9 +88,6 @@ int main(int argc, char* argv[]) {
HIPCHECK(hipStreamCreate(&stream));
cout << "info: warm up launch for 'write_kernel' on the stream " << stream << endl;
hipLaunchKernelGGL(write_kernel, dim3(blocks), dim3(threadsPerBlock), 0, stream, dDst, N, pval);
HIPCHECK(hipMemcpy(hDst, dDst, nBytes , hipMemcpyDeviceToHost));
hipDeviceSynchronize();
@@ -106,9 +103,6 @@ int main(int argc, char* argv[]) {
}
}
cout << "info: data validated for warm up launch for 'write_kernel" << endl;
cout << "info: launching 'write_kernel' on the stream " << stream << " for "<< nIter << " iterations"<< endl;
auto all_start = chrono::steady_clock::now();
for(int i = 0; i < nIter; i++) {
hipLaunchKernelGGL(write_kernel, dim3(blocks), dim3(threadsPerBlock), 0, stream, dDst, N, pval);
@@ -121,31 +115,8 @@ int main(int argc, char* argv[]) {
double perf = ((double)nBytes * nIter * (double)(1e-09)) / all_kernel_time.count();
cout << "info: average write speed of " << perf << " GB/s " << "achieved for memory size of " <<
nBytes / (1024 * 1024) << " MB, calculated based on host time" << endl;
nBytes / (1024 * 1024) << " MB" << endl;
// measure performance based on events time
hipEvent_t start, stop;
HIPCHECK(hipEventCreate(&start));
HIPCHECK(hipEventCreate(&stop));
float allEventMs = 0;
for(int i = 0; i < nIter; i++) {
HIPCHECK(hipEventRecord(start, NULL));
hipLaunchKernelGGL(write_kernel, dim3(blocks), dim3(threadsPerBlock), 0, stream, dDst, N, pval);
HIPCHECK(hipEventRecord(stop, NULL));
HIPCHECK(hipEventSynchronize(stop));
float eventMs = 1.0f;
HIPCHECK(hipEventElapsedTime(&eventMs, start, stop));
allEventMs += eventMs;
}
double perfe = ((double)nBytes * nIter * (double)(1e-06)) / allEventMs;
cout << "info: average write speed of " << perfe << " GB/s " << "achieved for memory size of " <<
nBytes / (1024 * 1024) << " MB, calculated based on events time" << endl;
delete [] hDst;
hipFree(dDst);