From fb6fe518e83d1c002ee9cc8cdd2bce236011296d Mon Sep 17 00:00:00 2001 From: habajpai-amd Date: Fri, 5 Sep 2025 01:38:16 +0530 Subject: [PATCH] fix(transpose): correct host allocation and GB/s calculation (#860) --- .../examples/transpose/transpose.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/projects/rocprofiler-systems/examples/transpose/transpose.cpp b/projects/rocprofiler-systems/examples/transpose/transpose.cpp index 4891341fe4..31f8c21067 100644 --- a/projects/rocprofiler-systems/examples/transpose/transpose.cpp +++ b/projects/rocprofiler-systems/examples/transpose/transpose.cpp @@ -116,10 +116,12 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv) std::default_random_engine _engine{ std::random_device{}() * (rank + 1) * (tid + 1) }; std::uniform_int_distribution _dist{ 0, 1000 }; - size_t size = sizeof(int) * M * N; - int* inp_matrix = new int[size]; - int* out_matrix = new int[size]; - for(size_t i = 0; i < M * N; i++) + const size_t elems = static_cast(M) * static_cast(N); + const size_t size = elems * sizeof(int); + int* inp_matrix = new int[elems]; + int* out_matrix = new int[elems]; + + for(size_t i = 0; i < elems; i++) { inp_matrix[i] = _dist(_engine); out_matrix[i] = 0; @@ -149,7 +151,7 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv) HIP_API_CALL(hipMemcpyAsync(out_matrix, out, size, hipMemcpyDeviceToHost, stream)); double time = std::chrono::duration_cast>(t2 - t1).count(); - float GB = (float) size * nitr * 2 / (1 << 30); + float GB = static_cast(size) * nitr * 2 / (1 << 30); print_lock.lock(); std::cout << "[" << rank << "][" << tid << "] Runtime of transpose is " << time