2
0

fix(transpose): correct host allocation and GB/s calculation (#860)

Este cometimento está contido em:
habajpai-amd
2025-09-05 01:38:16 +05:30
cometido por GitHub
ascendente 782dc9214b
cometimento fb6fe518e8
+7 -5
Ver ficheiro
@@ -116,10 +116,12 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
std::default_random_engine _engine{ std::random_device{}() * (rank + 1) * (tid + 1) };
std::uniform_int_distribution<int> _dist{ 0, 1000 };
size_t size = sizeof(int) * M * N;
int* inp_matrix = new int[size];
int* out_matrix = new int[size];
for(size_t i = 0; i < M * N; i++)
const size_t elems = static_cast<size_t>(M) * static_cast<size_t>(N);
const size_t size = elems * sizeof(int);
int* inp_matrix = new int[elems];
int* out_matrix = new int[elems];
for(size_t i = 0; i < elems; i++)
{
inp_matrix[i] = _dist(_engine);
out_matrix[i] = 0;
@@ -149,7 +151,7 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
HIP_API_CALL(hipMemcpyAsync(out_matrix, out, size, hipMemcpyDeviceToHost, stream));
double time =
std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1).count();
float GB = (float) size * nitr * 2 / (1 << 30);
float GB = static_cast<float>(size) * nitr * 2 / (1 << 30);
print_lock.lock();
std::cout << "[" << rank << "][" << tid << "] Runtime of transpose is " << time