From b3c5a6f048103cec367b80918b8fb72363322a16 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 27 Apr 2022 16:58:45 -0500 Subject: [PATCH] perfetto mpi + mpi example (#49) [ROCm/rocprofiler-systems commit: 6b7b6e46cf56be46a99e4f2940212e08c5523636] --- .../examples/CMakeLists.txt | 1 + .../examples/mpi/CMakeLists.txt | 22 +++ .../rocprofiler-systems/examples/mpi/mpi.cpp | 125 ++++++++++++++++++ .../source/lib/omnitrace/src/library.cpp | 83 +++++++----- .../rocprofiler-systems/tests/CMakeLists.txt | 32 +++++ 5 files changed, 233 insertions(+), 30 deletions(-) create mode 100644 projects/rocprofiler-systems/examples/mpi/CMakeLists.txt create mode 100644 projects/rocprofiler-systems/examples/mpi/mpi.cpp diff --git a/projects/rocprofiler-systems/examples/CMakeLists.txt b/projects/rocprofiler-systems/examples/CMakeLists.txt index 74f8b68b43..2d806bcbb3 100644 --- a/projects/rocprofiler-systems/examples/CMakeLists.txt +++ b/projects/rocprofiler-systems/examples/CMakeLists.txt @@ -10,6 +10,7 @@ add_subdirectory(parallel-overhead) add_subdirectory(code-coverage) add_subdirectory(user-api) add_subdirectory(openmp) +add_subdirectory(mpi) if(CMAKE_VERSION VERSION_LESS 3.16 AND (NOT DEFINED LULESH_BUILD_KOKKOS OR LULESH_BUILD_KOKKOS)) diff --git a/projects/rocprofiler-systems/examples/mpi/CMakeLists.txt b/projects/rocprofiler-systems/examples/mpi/CMakeLists.txt new file mode 100644 index 0000000000..ea676f0d41 --- /dev/null +++ b/projects/rocprofiler-systems/examples/mpi/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.16 FATAL_ERROR) + +project(omnitrace-mpi-example LANGUAGES CXX) + +find_package(MPI) +if(NOT MPI_FOUND) + message(AUTHOR_WARNING "MPI could not be found. Cannot build omnitrace-mpi target") + return() +endif() + +add_executable(mpi-example mpi.cpp) + +if(TARGET omnitrace::omnitrace-compile-options) + target_link_libraries(mpi-example PRIVATE omnitrace::omnitrace-compile-options) +endif() + +target_link_libraries(mpi-example PRIVATE MPI::MPI_CXX) + +if(NOT CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) + set_target_properties(mpi-example PROPERTIES RUNTIME_OUTPUT_DIRECTORY + ${CMAKE_BINARY_DIR}) +endif() diff --git a/projects/rocprofiler-systems/examples/mpi/mpi.cpp b/projects/rocprofiler-systems/examples/mpi/mpi.cpp new file mode 100644 index 0000000000..073f07f43b --- /dev/null +++ b/projects/rocprofiler-systems/examples/mpi/mpi.cpp @@ -0,0 +1,125 @@ +/* +Copyright (c) 2015-2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static std::mutex print_lock{}; +using auto_lock_t = std::unique_lock; + +#include + +std::string _name = {}; + +template +void +all2all(int _rank) +{ + static_assert(N > 0, "Error! N must be greater than zero!"); + + auto _mt = std::mt19937_64{ size_t(_rank + 100) }; + auto _dist = []() { + if constexpr(std::is_integral::value) + { + return std::uniform_int_distribution(1, N * N); + } + else + { + return std::uniform_real_distribution(1.0, N * N); + } + }(); + + auto _get_values_str = [](const auto& _data) { + std::stringstream _ss{}; + for(auto&& itr : _data) + _ss << ", " << std::setw(6) << std::setprecision(2) << std::fixed << itr; + return _ss.str().substr(1); + }; + + std::array values_sent = {}; + std::array values_recv = {}; + for(size_t i = 0; i < N; ++i) + values_sent[i] = _dist(_mt); + + if(_rank == 0) + printf("[%s][%i] values sent (# = %zu) :: %s.\n", _name.c_str(), _rank, + values_sent.size(), _get_values_str(values_sent).c_str()); + + auto _dtype = MPI_INT; + if(std::is_same::value) + _dtype = MPI_LONG; + else if(std::is_same::value) + _dtype = MPI_FLOAT; + else if(std::is_same::value) + _dtype = MPI_DOUBLE; + + MPI_Alltoall(&values_sent[_rank], 1, _dtype, &values_recv[_rank], 1, _dtype, + MPI_COMM_WORLD); + + if(_rank == 0) + printf("[%s][%i] values recv (# = %zu) :: %s.\n", _name.c_str(), _rank, + values_sent.size(), _get_values_str(values_recv).c_str()); +} + +int +main(int argc, char** argv) +{ + int rank = 0; + int size = 1; + int nitr = 1; + if(argc > 1) nitr = atoi(argv[2]); + + _name = argv[0]; + auto _pos = _name.find_last_of('/'); + if(_pos < _name.length()) _name = _name.substr(_pos + 1); + + printf("[%s] Number of iterations: %i\n", _name.c_str(), nitr); + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + MPI_Barrier(MPI_COMM_WORLD); + for(int i = 0; i < nitr; ++i) + { + all2all(rank); + all2all(rank); + MPI_Barrier(MPI_COMM_WORLD); + all2all(rank); + all2all(rank); + } + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); + + return 0; +} diff --git a/projects/rocprofiler-systems/source/lib/omnitrace/src/library.cpp b/projects/rocprofiler-systems/source/lib/omnitrace/src/library.cpp index 01e40c8fe4..24dedaf322 100644 --- a/projects/rocprofiler-systems/source/lib/omnitrace/src/library.cpp +++ b/projects/rocprofiler-systems/source/lib/omnitrace/src/library.cpp @@ -1190,41 +1190,64 @@ omnitrace_finalize_hidden(void) OMNITRACE_VERBOSE_F(3, "Stopping the blocking perfetto trace sessions...\n"); tracing_session->StopBlocking(); + using char_vec_t = std::vector; OMNITRACE_VERBOSE_F(3, "Getting the trace data...\n"); - std::vector trace_data{ tracing_session->ReadTraceBlocking() }; - if(trace_data.empty()) - { - fprintf(stderr, - "[%s]> trace data is empty. File '%s' will not be written...\n", - OMNITRACE_FUNCTION, get_perfetto_output_filename().c_str()); - return; - } - // Write the trace into a file. - if(get_verbose() >= 0) - fprintf(stderr, - "[%s][%s]|%i> Outputting '%s' (%.2f KB / %.2f MB / %.2f GB)... ", - TIMEMORY_PROJECT_NAME, OMNITRACE_FUNCTION, dmp::rank(), - get_perfetto_output_filename().c_str(), - static_cast(trace_data.size()) / units::KB, - static_cast(trace_data.size()) / units::MB, - static_cast(trace_data.size()) / units::GB); - std::ofstream ofs{}; - if(!tim::filepath::open(ofs, get_perfetto_output_filename(), - std::ios::out | std::ios::binary)) - { - fprintf(stderr, "\n[%s]> Error opening '%s'...\n", OMNITRACE_FUNCTION, - get_perfetto_output_filename().c_str()); - _perfetto_output_error = true; - } - else +#if defined(TIMEMORY_USE_MPI) && TIMEMORY_USE_MPI > 0 + using perfetto_mpi_get_t = tim::operation::finalize::mpi_get; + + char_vec_t _trace_data{ tracing_session->ReadTraceBlocking() }; + std::vector _rank_data = {}; + auto _combine = [](char_vec_t& _dst, const char_vec_t& _src) -> char_vec_t& { + _dst.reserve(_dst.size() + _src.size()); + for(auto&& itr : _src) + _dst.emplace_back(itr); + return _dst; + }; + + perfetto_mpi_get_t{ _rank_data, _trace_data, _combine }; + auto trace_data = char_vec_t{}; + for(auto& itr : _rank_data) + trace_data = + (trace_data.empty()) ? std::move(itr) : _combine(trace_data, itr); +#else + char_vec_t trace_data{ tracing_session->ReadTraceBlocking() }; +#endif + + if(!trace_data.empty()) { // Write the trace into a file. - if(get_verbose() >= 0) fprintf(stderr, "Done\n"); - ofs.write(&trace_data[0], trace_data.size()); + if(get_verbose() >= 0) + fprintf(stderr, + "[%s][%s]|%i> Outputting '%s' (%.2f KB / %.2f MB / %.2f GB)... ", + TIMEMORY_PROJECT_NAME, OMNITRACE_FUNCTION, dmp::rank(), + get_perfetto_output_filename().c_str(), + static_cast(trace_data.size()) / units::KB, + static_cast(trace_data.size()) / units::MB, + static_cast(trace_data.size()) / units::GB); + std::ofstream ofs{}; + if(!tim::filepath::open(ofs, get_perfetto_output_filename(), + std::ios::out | std::ios::binary)) + { + OMNITRACE_VERBOSE_F(0, "Error opening '%s'...\n", + get_perfetto_output_filename().c_str()); + _perfetto_output_error = true; + } + else + { + // Write the trace into a file. + if(get_verbose() >= 0) fprintf(stderr, "Done\n"); + ofs.write(&trace_data[0], trace_data.size()); + } + ofs.close(); + if(get_verbose() >= 0) fprintf(stderr, "\n"); + } + else if(dmp::rank() == 0) + { + OMNITRACE_VERBOSE_F(0, + "trace data is empty. File '%s' will not be written...\n", + get_perfetto_output_filename().c_str()); } - ofs.close(); - if(get_verbose() >= 0) fprintf(stderr, "\n"); } // shutdown tasking before timemory is finalized, especially the roctracer thread-pool diff --git a/projects/rocprofiler-systems/tests/CMakeLists.txt b/projects/rocprofiler-systems/tests/CMakeLists.txt index 06f5c4b3d9..94fef5de91 100644 --- a/projects/rocprofiler-systems/tests/CMakeLists.txt +++ b/projects/rocprofiler-systems/tests/CMakeLists.txt @@ -135,6 +135,8 @@ function(OMNITRACE_ADD_TEST) if(NOT TEST_NUM_PROCS GREATER NUM_PROCS_REAL) set(COMMAND_PREFIX ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${TEST_NUM_PROCS}) + list(APPEND TEST_LABELS mpi parallel-${TEST_NUM_PROCS}) + list(APPEND TEST_PROPERTIES PARALLEL_LEVEL ${TEST_NUM_PROCS}) else() set(COMMAND_PREFIX ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 1) endif() @@ -428,6 +430,36 @@ omnitrace_add_test( RUN_ARGS 10 ${NUM_THREADS} 1000 ENVIRONMENT "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF") +omnitrace_add_test( + SKIP_RUNTIME + NAME "mpi" + TARGET mpi-example + MPI ON + NUM_PROCS 4 + REWRITE_ARGS + -e + -v + 2 + --label + file + line + return + args + --min-instructions + 0 + RUNTIME_ARGS + -e + -v + 1 + --label + file + line + return + args + --min-instructions + 0 + ENVIRONMENT "${_base_environment}") + omnitrace_add_test( NAME lulesh TARGET lulesh