perfetto mpi + mpi example (#49)
[ROCm/rocprofiler-systems commit: 6b7b6e46cf]
Цей коміт міститься в:
зафіксовано
GitHub
джерело
2bb6fd0cfb
коміт
b3c5a6f048
@@ -10,6 +10,7 @@ add_subdirectory(parallel-overhead)
|
||||
add_subdirectory(code-coverage)
|
||||
add_subdirectory(user-api)
|
||||
add_subdirectory(openmp)
|
||||
add_subdirectory(mpi)
|
||||
|
||||
if(CMAKE_VERSION VERSION_LESS 3.16 AND (NOT DEFINED LULESH_BUILD_KOKKOS
|
||||
OR LULESH_BUILD_KOKKOS))
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
|
||||
|
||||
project(omnitrace-mpi-example LANGUAGES CXX)
|
||||
|
||||
find_package(MPI)
|
||||
if(NOT MPI_FOUND)
|
||||
message(AUTHOR_WARNING "MPI could not be found. Cannot build omnitrace-mpi target")
|
||||
return()
|
||||
endif()
|
||||
|
||||
add_executable(mpi-example mpi.cpp)
|
||||
|
||||
if(TARGET omnitrace::omnitrace-compile-options)
|
||||
target_link_libraries(mpi-example PRIVATE omnitrace::omnitrace-compile-options)
|
||||
endif()
|
||||
|
||||
target_link_libraries(mpi-example PRIVATE MPI::MPI_CXX)
|
||||
|
||||
if(NOT CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
|
||||
set_target_properties(mpi-example PROPERTIES RUNTIME_OUTPUT_DIRECTORY
|
||||
${CMAKE_BINARY_DIR})
|
||||
endif()
|
||||
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
Copyright (c) 2015-2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <cfloat>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
static std::mutex print_lock{};
|
||||
using auto_lock_t = std::unique_lock<std::mutex>;
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
std::string _name = {};
|
||||
|
||||
template <typename Tp, size_t N>
|
||||
void
|
||||
all2all(int _rank)
|
||||
{
|
||||
static_assert(N > 0, "Error! N must be greater than zero!");
|
||||
|
||||
auto _mt = std::mt19937_64{ size_t(_rank + 100) };
|
||||
auto _dist = []() {
|
||||
if constexpr(std::is_integral<Tp>::value)
|
||||
{
|
||||
return std::uniform_int_distribution<Tp>(1, N * N);
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::uniform_real_distribution<Tp>(1.0, N * N);
|
||||
}
|
||||
}();
|
||||
|
||||
auto _get_values_str = [](const auto& _data) {
|
||||
std::stringstream _ss{};
|
||||
for(auto&& itr : _data)
|
||||
_ss << ", " << std::setw(6) << std::setprecision(2) << std::fixed << itr;
|
||||
return _ss.str().substr(1);
|
||||
};
|
||||
|
||||
std::array<Tp, N> values_sent = {};
|
||||
std::array<Tp, N> values_recv = {};
|
||||
for(size_t i = 0; i < N; ++i)
|
||||
values_sent[i] = _dist(_mt);
|
||||
|
||||
if(_rank == 0)
|
||||
printf("[%s][%i] values sent (# = %zu) :: %s.\n", _name.c_str(), _rank,
|
||||
values_sent.size(), _get_values_str(values_sent).c_str());
|
||||
|
||||
auto _dtype = MPI_INT;
|
||||
if(std::is_same<Tp, long>::value)
|
||||
_dtype = MPI_LONG;
|
||||
else if(std::is_same<Tp, float>::value)
|
||||
_dtype = MPI_FLOAT;
|
||||
else if(std::is_same<Tp, double>::value)
|
||||
_dtype = MPI_DOUBLE;
|
||||
|
||||
MPI_Alltoall(&values_sent[_rank], 1, _dtype, &values_recv[_rank], 1, _dtype,
|
||||
MPI_COMM_WORLD);
|
||||
|
||||
if(_rank == 0)
|
||||
printf("[%s][%i] values recv (# = %zu) :: %s.\n", _name.c_str(), _rank,
|
||||
values_sent.size(), _get_values_str(values_recv).c_str());
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
int rank = 0;
|
||||
int size = 1;
|
||||
int nitr = 1;
|
||||
if(argc > 1) nitr = atoi(argv[2]);
|
||||
|
||||
_name = argv[0];
|
||||
auto _pos = _name.find_last_of('/');
|
||||
if(_pos < _name.length()) _name = _name.substr(_pos + 1);
|
||||
|
||||
printf("[%s] Number of iterations: %i\n", _name.c_str(), nitr);
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
for(int i = 0; i < nitr; ++i)
|
||||
{
|
||||
all2all<int, 3>(rank);
|
||||
all2all<long, 4>(rank);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
all2all<float, 5>(rank);
|
||||
all2all<double, 6>(rank);
|
||||
}
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
MPI_Finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1190,41 +1190,64 @@ omnitrace_finalize_hidden(void)
|
||||
OMNITRACE_VERBOSE_F(3, "Stopping the blocking perfetto trace sessions...\n");
|
||||
tracing_session->StopBlocking();
|
||||
|
||||
using char_vec_t = std::vector<char>;
|
||||
OMNITRACE_VERBOSE_F(3, "Getting the trace data...\n");
|
||||
std::vector<char> trace_data{ tracing_session->ReadTraceBlocking() };
|
||||
|
||||
if(trace_data.empty())
|
||||
{
|
||||
fprintf(stderr,
|
||||
"[%s]> trace data is empty. File '%s' will not be written...\n",
|
||||
OMNITRACE_FUNCTION, get_perfetto_output_filename().c_str());
|
||||
return;
|
||||
}
|
||||
// Write the trace into a file.
|
||||
if(get_verbose() >= 0)
|
||||
fprintf(stderr,
|
||||
"[%s][%s]|%i> Outputting '%s' (%.2f KB / %.2f MB / %.2f GB)... ",
|
||||
TIMEMORY_PROJECT_NAME, OMNITRACE_FUNCTION, dmp::rank(),
|
||||
get_perfetto_output_filename().c_str(),
|
||||
static_cast<double>(trace_data.size()) / units::KB,
|
||||
static_cast<double>(trace_data.size()) / units::MB,
|
||||
static_cast<double>(trace_data.size()) / units::GB);
|
||||
std::ofstream ofs{};
|
||||
if(!tim::filepath::open(ofs, get_perfetto_output_filename(),
|
||||
std::ios::out | std::ios::binary))
|
||||
{
|
||||
fprintf(stderr, "\n[%s]> Error opening '%s'...\n", OMNITRACE_FUNCTION,
|
||||
get_perfetto_output_filename().c_str());
|
||||
_perfetto_output_error = true;
|
||||
}
|
||||
else
|
||||
#if defined(TIMEMORY_USE_MPI) && TIMEMORY_USE_MPI > 0
|
||||
using perfetto_mpi_get_t = tim::operation::finalize::mpi_get<char_vec_t, true>;
|
||||
|
||||
char_vec_t _trace_data{ tracing_session->ReadTraceBlocking() };
|
||||
std::vector<char_vec_t> _rank_data = {};
|
||||
auto _combine = [](char_vec_t& _dst, const char_vec_t& _src) -> char_vec_t& {
|
||||
_dst.reserve(_dst.size() + _src.size());
|
||||
for(auto&& itr : _src)
|
||||
_dst.emplace_back(itr);
|
||||
return _dst;
|
||||
};
|
||||
|
||||
perfetto_mpi_get_t{ _rank_data, _trace_data, _combine };
|
||||
auto trace_data = char_vec_t{};
|
||||
for(auto& itr : _rank_data)
|
||||
trace_data =
|
||||
(trace_data.empty()) ? std::move(itr) : _combine(trace_data, itr);
|
||||
#else
|
||||
char_vec_t trace_data{ tracing_session->ReadTraceBlocking() };
|
||||
#endif
|
||||
|
||||
if(!trace_data.empty())
|
||||
{
|
||||
// Write the trace into a file.
|
||||
if(get_verbose() >= 0) fprintf(stderr, "Done\n");
|
||||
ofs.write(&trace_data[0], trace_data.size());
|
||||
if(get_verbose() >= 0)
|
||||
fprintf(stderr,
|
||||
"[%s][%s]|%i> Outputting '%s' (%.2f KB / %.2f MB / %.2f GB)... ",
|
||||
TIMEMORY_PROJECT_NAME, OMNITRACE_FUNCTION, dmp::rank(),
|
||||
get_perfetto_output_filename().c_str(),
|
||||
static_cast<double>(trace_data.size()) / units::KB,
|
||||
static_cast<double>(trace_data.size()) / units::MB,
|
||||
static_cast<double>(trace_data.size()) / units::GB);
|
||||
std::ofstream ofs{};
|
||||
if(!tim::filepath::open(ofs, get_perfetto_output_filename(),
|
||||
std::ios::out | std::ios::binary))
|
||||
{
|
||||
OMNITRACE_VERBOSE_F(0, "Error opening '%s'...\n",
|
||||
get_perfetto_output_filename().c_str());
|
||||
_perfetto_output_error = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Write the trace into a file.
|
||||
if(get_verbose() >= 0) fprintf(stderr, "Done\n");
|
||||
ofs.write(&trace_data[0], trace_data.size());
|
||||
}
|
||||
ofs.close();
|
||||
if(get_verbose() >= 0) fprintf(stderr, "\n");
|
||||
}
|
||||
else if(dmp::rank() == 0)
|
||||
{
|
||||
OMNITRACE_VERBOSE_F(0,
|
||||
"trace data is empty. File '%s' will not be written...\n",
|
||||
get_perfetto_output_filename().c_str());
|
||||
}
|
||||
ofs.close();
|
||||
if(get_verbose() >= 0) fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
// shutdown tasking before timemory is finalized, especially the roctracer thread-pool
|
||||
|
||||
@@ -135,6 +135,8 @@ function(OMNITRACE_ADD_TEST)
|
||||
if(NOT TEST_NUM_PROCS GREATER NUM_PROCS_REAL)
|
||||
set(COMMAND_PREFIX ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG}
|
||||
${TEST_NUM_PROCS})
|
||||
list(APPEND TEST_LABELS mpi parallel-${TEST_NUM_PROCS})
|
||||
list(APPEND TEST_PROPERTIES PARALLEL_LEVEL ${TEST_NUM_PROCS})
|
||||
else()
|
||||
set(COMMAND_PREFIX ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 1)
|
||||
endif()
|
||||
@@ -428,6 +430,36 @@ omnitrace_add_test(
|
||||
RUN_ARGS 10 ${NUM_THREADS} 1000
|
||||
ENVIRONMENT "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF")
|
||||
|
||||
omnitrace_add_test(
|
||||
SKIP_RUNTIME
|
||||
NAME "mpi"
|
||||
TARGET mpi-example
|
||||
MPI ON
|
||||
NUM_PROCS 4
|
||||
REWRITE_ARGS
|
||||
-e
|
||||
-v
|
||||
2
|
||||
--label
|
||||
file
|
||||
line
|
||||
return
|
||||
args
|
||||
--min-instructions
|
||||
0
|
||||
RUNTIME_ARGS
|
||||
-e
|
||||
-v
|
||||
1
|
||||
--label
|
||||
file
|
||||
line
|
||||
return
|
||||
args
|
||||
--min-instructions
|
||||
0
|
||||
ENVIRONMENT "${_base_environment}")
|
||||
|
||||
omnitrace_add_test(
|
||||
NAME lulesh
|
||||
TARGET lulesh
|
||||
|
||||
Посилання в новій задачі
Заблокувати користувача