Roctracer flush activity fix + perfetto.cfg (#317)
* Fix roctracer_flush_activity
- invoke roctracer_flush_activity() before disabling domains
* create comp::roctracer::flush()
- real issue was the global state when roctracer_flush_activity() was called
* formatting
* Update lib/omnitrace/library/components/roctracer.hpp
- provide definition of comp::roctracer::flush when OMNITRACE_USE_ROCTRACER is not defined
* omnitrace.cfg -> perfetto.cfg
- rename provided perfetto config file (omnitrace.cfg) to perfetto.cfg to avoid confusion
* Update lib/core
- gpu.hpp: defines for OMNITRACE_USE_{HIP,ROCTRACER,ROCPROFILER,ROCM_SMI}
- gpu.cpp
- include core/hip_runtime.hpp
- fix serialization of hipDeviceProp_t
- add hip_runtime.hpp
- ensure proper inclusion of hip_runtime.h
- add rccl.hpp
- ensure proper inclusion of rccl.h
* Update lib/omnitrace/library
- rcclp.cpp
- update includes for rccl
- roctracer.hpp
- update includes for hip_runtime
- components/comm_data.hpp
- update includes for rccl
- components/rcclp.hpp
- update includes for rccl
* Update bin/omnitrace-avail/avail.cpp
- update includes for hip_runtime
* Update examples/rccl/CMakeLists.txt
- fix find_package for rccl when CI enabled
* Update CMakeLists.txt
- set cmake policy CMP0135 to NEW for cmake >= 3.24
- Enable DOWNLOAD_EXTRACT_TIMESTAMP with ExternalProject_Add + URL download method
* Update timemory submodule
* Update pybind11 submodule
* Update pybind11 submodule
* Update lib/core/rccl.hpp
- include rccl.h only if OMNITRACE_USE_RCCL > 0
* Update lib/core/{gpu,hip_runtime}.hpp
* Update lib/core/gpu.cpp
- reintroduce some ppdefs
* Update lib/core/gpu.cpp
- fix ifdef on OMNITRACE_HIP_VERSION
* Update lib/core/gpu.cpp
- fix static assert for OMNITRACE_HIP_VERSION_MINOR when HIP version 4.x or older (unreliable minor versions)
* Update lib/core/gpu.cpp
- fix ifdef on OMNITRACE_HIP_VERSION
* Update lib/core/config.cpp
- disable OMNITRACE_PERFETTO_COMBINE_TRACES by default
* Update lib/core/perfetto.cpp
- if unable to open perfetto temp file, return the ReadTraceBlocking()
* Update lib/core/config.*
- flush tmpfile before closing
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
aeb346b6d6
Коммит
7bc50f5a0a
@@ -71,6 +71,10 @@ set(CMAKE_POSITION_INDEPENDENT_CODE
|
||||
ON
|
||||
CACHE BOOL "Build position independent code")
|
||||
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.24)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
endif()
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "")
|
||||
set(CMAKE_BUILD_TYPE
|
||||
Release
|
||||
@@ -366,8 +370,8 @@ if(NOT OMNITRACE_USE_ROCPROFILER)
|
||||
endif()
|
||||
|
||||
configure_file(
|
||||
${PROJECT_SOURCE_DIR}/omnitrace.cfg
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/omnitrace.cfg
|
||||
${PROJECT_SOURCE_DIR}/perfetto.cfg
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/perfetto.cfg
|
||||
COPYONLY)
|
||||
|
||||
configure_file(
|
||||
@@ -381,7 +385,7 @@ configure_file(
|
||||
|
||||
install(
|
||||
FILES ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/setup-env.sh
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/omnitrace.cfg
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/perfetto.cfg
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}
|
||||
COMPONENT setup)
|
||||
|
||||
|
||||
+1
-1
@@ -325,7 +325,7 @@ Enable `traced` and `perfetto` in the background:
|
||||
```shell
|
||||
pkill traced
|
||||
traced --background
|
||||
perfetto --out ./omnitrace-perfetto.proto --txt -c ${OMNITRACE_ROOT}/share/omnitrace.cfg --background
|
||||
perfetto --out ./omnitrace-perfetto.proto --txt -c ${OMNITRACE_ROOT}/share/perfetto.cfg --background
|
||||
```
|
||||
|
||||
> ***NOTE: if the perfetto tools were installed by omnitrace, replace `traced` with `omnitrace-perfetto-traced` and***
|
||||
|
||||
@@ -31,7 +31,10 @@ if(NOT hip_FOUND)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if("${CMAKE_PROJECT_NAME}" STREQUAL "omnitrace" AND "$ENV{OMNITRACE_CI}")
|
||||
if("${CMAKE_PROJECT_NAME}" STREQUAL "omnitrace"
|
||||
AND ("$ENV{OMNITRACE_CI}"
|
||||
OR OMNITRACE_CI
|
||||
OR OMNITRACE_BUILD_CI))
|
||||
find_package(rccl QUIET) # avoid generating warning in CI
|
||||
else()
|
||||
find_package(rccl)
|
||||
|
||||
поставляемый
+1
-1
Submodule external/pybind11 updated: ad0de0f5a6...1a917f1852
поставляемый
+1
-1
Submodule external/timemory updated: ace5bc4dc9...2a1bcba0ca
@@ -1,6 +1,4 @@
|
||||
# perfetto --out OUTPUT_FILE --txt -c omnitrace.cfg
|
||||
# 5 minute trace, but can be stopped prematurely.
|
||||
duration_ms: 300000
|
||||
# perfetto --out OUTPUT_FILE --txt -c perfetto.cfg
|
||||
write_into_file: true
|
||||
|
||||
# One buffer allocated within the central tracing binary for the entire trace,
|
||||
@@ -33,6 +33,7 @@
|
||||
#include "api.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/gpu.hpp"
|
||||
#include "core/hip_runtime.hpp"
|
||||
#include "library/rocprofiler.hpp"
|
||||
|
||||
#include <timemory/components.hpp>
|
||||
@@ -62,12 +63,6 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#if defined(OMNITRACE_USE_HIP) && OMNITRACE_USE_HIP > 0
|
||||
# include <hip/hip_runtime.h>
|
||||
#elif !defined(OMNITRACE_USE_HIP)
|
||||
# define OMNITRACE_USE_HIP 0
|
||||
#endif
|
||||
|
||||
#if defined(TIMEMORY_UNIX)
|
||||
# include <sys/ioctl.h> // ioctl() and TIOCGWINSZ
|
||||
# include <unistd.h> // for STDOUT_FILENO
|
||||
|
||||
@@ -205,7 +205,7 @@ OMNITRACE_KOKKOSP_KERNEL_LOGGER = false
|
||||
OMNITRACE_PAPI_EVENTS = PAPI_TOT_CYC
|
||||
OMNITRACE_PERFETTO_BACKEND = inprocess
|
||||
OMNITRACE_PERFETTO_BUFFER_SIZE_KB = 1024000
|
||||
OMNITRACE_PERFETTO_COMBINE_TRACES = true
|
||||
OMNITRACE_PERFETTO_COMBINE_TRACES = false
|
||||
OMNITRACE_PERFETTO_FILE = perfetto-trace.proto
|
||||
OMNITRACE_PERFETTO_FILL_POLICY = discard
|
||||
OMNITRACE_PERFETTO_SHMEM_SIZE_HINT_KB = 4096
|
||||
|
||||
@@ -29,10 +29,12 @@ set(core_headers
|
||||
${CMAKE_CURRENT_LIST_DIR}/dynamic_library.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/exception.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/gpu.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/hip_runtime.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/locking.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/mproc.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/perf.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/perfetto.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rccl.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/redirect.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/state.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/timemory.hpp
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
#include "perfetto.hpp"
|
||||
#include "utility.hpp"
|
||||
|
||||
#include <asm-generic/errno-base.h>
|
||||
#include <timemory/backends/capability.hpp>
|
||||
#include <timemory/backends/dmp.hpp>
|
||||
#include <timemory/backends/mpi.hpp>
|
||||
@@ -650,8 +651,7 @@ configure_settings(bool _init)
|
||||
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_PERFETTO_COMBINE_TRACES",
|
||||
"Combine Perfetto traces. If not explicitly set, it will "
|
||||
"default to the value of OMNITRACE_COLLAPSE_PROCESSES",
|
||||
_config->get<bool>("collapse_processes"), "perfetto", "data",
|
||||
"advanced");
|
||||
false, "perfetto", "data", "advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
bool, "OMNITRACE_PERFETTO_ROCTRACER_PER_STREAM",
|
||||
@@ -2527,9 +2527,34 @@ tmp_file::fopen(const char* _mode)
|
||||
return (file != nullptr && fd > 0);
|
||||
}
|
||||
|
||||
bool
|
||||
tmp_file::flush()
|
||||
{
|
||||
if(stream.is_open())
|
||||
{
|
||||
stream.flush();
|
||||
}
|
||||
else if(file != nullptr)
|
||||
{
|
||||
int _ret = fflush(file);
|
||||
int _cnt = 0;
|
||||
while(_ret == EAGAIN || _ret == EINTR)
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{ 100 });
|
||||
_ret = fflush(file);
|
||||
if(++_cnt > 10) break;
|
||||
}
|
||||
return (_ret == 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
tmp_file::close()
|
||||
{
|
||||
flush();
|
||||
|
||||
if(stream.is_open())
|
||||
{
|
||||
stream.close();
|
||||
|
||||
@@ -394,6 +394,7 @@ struct tmp_file
|
||||
|
||||
bool open(std::ios::openmode = std::ios::binary | std::ios::in | std::ios::out);
|
||||
bool fopen(const char* = "r+");
|
||||
bool flush();
|
||||
bool close();
|
||||
bool remove();
|
||||
|
||||
|
||||
+229
-16
@@ -20,6 +20,8 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "common/defines.h"
|
||||
|
||||
#if !defined(OMNITRACE_USE_ROCM_SMI)
|
||||
# define OMNITRACE_USE_ROCM_SMI 0
|
||||
#endif
|
||||
@@ -28,15 +30,17 @@
|
||||
# define OMNITRACE_USE_HIP 0
|
||||
#endif
|
||||
|
||||
#include "core/hip_runtime.hpp"
|
||||
|
||||
#if OMNITRACE_USE_HIP > 0
|
||||
# if !defined(TIMEMORY_USE_HIP)
|
||||
# define TIMEMORY_USE_HIP 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "gpu.hpp"
|
||||
#include "debug.hpp"
|
||||
#include "defines.hpp"
|
||||
#include "gpu.hpp"
|
||||
|
||||
#include <timemory/manager.hpp>
|
||||
|
||||
@@ -45,10 +49,19 @@
|
||||
#endif
|
||||
|
||||
#if OMNITRACE_USE_HIP > 0
|
||||
# include <hip/hip_runtime.h>
|
||||
# include <hip/hip_runtime_api.h>
|
||||
# include <timemory/components/hip/backends.hpp>
|
||||
|
||||
static_assert(OMNITRACE_HIP_VERSION_MAJOR == HIP_VERSION_MAJOR,
|
||||
"OMNITRACE_HIP_VERSION_MAJOR (detected by cmake) != HIP_VERSION_MAJOR "
|
||||
"(from <hip/hip_version.h>)");
|
||||
|
||||
# if OMNITRACE_HIP_VERSION_MAJOR >= 5
|
||||
// HIP versions 4.x and older have unreliable values for HIP_VERSION_MINOR
|
||||
static_assert(OMNITRACE_HIP_VERSION_MINOR == HIP_VERSION_MINOR,
|
||||
"OMNITRACE_HIP_VERSION_MINOR (detected by cmake) != HIP_VERSION_MINOR "
|
||||
"(from <hip/hip_version.h>)");
|
||||
# endif
|
||||
|
||||
# if !defined(OMNITRACE_HIP_RUNTIME_CALL)
|
||||
# define OMNITRACE_HIP_RUNTIME_CALL(err) \
|
||||
{ \
|
||||
@@ -107,6 +120,91 @@ rsmi_init()
|
||||
return _rsmi_init;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if OMNITRACE_HIP_VERSION >= 60000
|
||||
template <typename ArchiveT, typename ArgT,
|
||||
std::enable_if_t<!std::is_pointer<ArgT>::value, int> = 0>
|
||||
void
|
||||
device_prop_serialize(ArchiveT& archive, const char* name, const ArgT& arg)
|
||||
{
|
||||
namespace cereal = tim::cereal;
|
||||
using cereal::make_nvp;
|
||||
archive(make_nvp(name, arg));
|
||||
}
|
||||
|
||||
template <typename ArchiveT, typename ArgT, size_t N>
|
||||
void
|
||||
device_prop_serialize(ArchiveT& archive, const char* name, ArgT arg[N])
|
||||
{
|
||||
if constexpr(!std::is_same<ArgT, char>::value &&
|
||||
!std::is_same<ArgT, const char>::value)
|
||||
{
|
||||
namespace cereal = tim::cereal;
|
||||
using cereal::make_nvp;
|
||||
auto data = std::array<int, N>{};
|
||||
for(size_t i = 0; i < N; ++i)
|
||||
data[i] = arg[i];
|
||||
archive(make_nvp(name, data));
|
||||
}
|
||||
else
|
||||
{
|
||||
device_prop_serialize(archive, name, std::string{ arg });
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
device_prop_serialize(ArchiveT& archive, const char* name, hipUUID_t arg)
|
||||
{
|
||||
constexpr auto N = sizeof(arg.bytes);
|
||||
namespace cereal = tim::cereal;
|
||||
using cereal::make_nvp;
|
||||
auto data = std::array<char, N + 1>{};
|
||||
data.fill('\0');
|
||||
for(size_t i = 0; i < N; ++i)
|
||||
data[i] = arg.bytes[i];
|
||||
auto str_v = std::string_view{ data.data() };
|
||||
auto str = std::string{ str_v }.substr(0, str_v.find('\0'));
|
||||
archive(make_nvp(name, str));
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
device_prop_serialize(ArchiveT& archive, const char* name, hipDeviceArch_t arg)
|
||||
{
|
||||
namespace cereal = tim::cereal;
|
||||
using cereal::make_nvp;
|
||||
|
||||
# define OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(NAME) \
|
||||
{ \
|
||||
auto val = arg.NAME; \
|
||||
archive(make_nvp(#NAME, val)); \
|
||||
}
|
||||
|
||||
archive.setNextName(name);
|
||||
archive.startNode();
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasGlobalInt32Atomics)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasGlobalFloatAtomicExch)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasSharedInt32Atomics)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasSharedFloatAtomicExch)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasFloatAtomicAdd)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasGlobalInt64Atomics)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasSharedInt64Atomics)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasDoubles)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasWarpVote)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasWarpBallot)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasWarpShuffle)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasFunnelShift)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasThreadFenceSystem)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasSyncThreadsExt)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasSurfaceFuncs)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(has3dGrid)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasDynamicParallelism)
|
||||
archive.finishNode();
|
||||
|
||||
# undef OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH
|
||||
}
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
int
|
||||
@@ -161,11 +259,10 @@ template <typename ArchiveT>
|
||||
void
|
||||
add_hip_device_metadata(ArchiveT& ar)
|
||||
{
|
||||
#if OMNITRACE_USE_HIP > 0
|
||||
namespace cereal = tim::cereal;
|
||||
using cereal::make_nvp;
|
||||
using intvec_t = std::vector<int>;
|
||||
|
||||
#if OMNITRACE_USE_HIP > 0
|
||||
int _device_count = 0;
|
||||
int _current_device = 0;
|
||||
hipError_t _device_count_err = hipGetDeviceCount(&_device_count);
|
||||
@@ -183,12 +280,6 @@ add_hip_device_metadata(ArchiveT& ar)
|
||||
|
||||
if(_current_device_err != hipSuccess || _device_count == 0) return;
|
||||
|
||||
# define OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(NAME) \
|
||||
ar(make_nvp(#NAME, _device_prop.NAME));
|
||||
|
||||
# define OMNITRACE_SERIALIZE_HIP_DEVICE_PROP_ARRAY(NAME, ...) \
|
||||
ar(make_nvp(NAME, __VA_ARGS__));
|
||||
|
||||
ar.setNextName("hip_device_properties");
|
||||
ar.startNode();
|
||||
ar.makeArray();
|
||||
@@ -205,6 +296,16 @@ add_hip_device_metadata(ArchiveT& ar)
|
||||
OMNITRACE_HIP_RUNTIME_CALL(hipRuntimeGetVersion(&_runtime_version));
|
||||
|
||||
ar.startNode();
|
||||
|
||||
# if OMNITRACE_HIP_VERSION < 60000
|
||||
using intvec_t = std::vector<int>;
|
||||
|
||||
# define OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(NAME) \
|
||||
ar(make_nvp(#NAME, _device_prop.NAME));
|
||||
|
||||
# define OMNITRACE_SERIALIZE_HIP_DEVICE_PROP_ARRAY(NAME, ...) \
|
||||
ar(make_nvp(NAME, __VA_ARGS__));
|
||||
|
||||
ar(make_nvp("name", std::string{ _device_prop.name }));
|
||||
ar(make_nvp("driver_version", _driver_version));
|
||||
ar(make_nvp("runtime_version", _runtime_version));
|
||||
@@ -215,11 +316,11 @@ add_hip_device_metadata(ArchiveT& ar)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(totalConstMem)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(clockRate)
|
||||
|
||||
# if OMNITRACE_HIP_VERSION >= 5000
|
||||
# if OMNITRACE_HIP_VERSION >= 50000
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memoryClockRate)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memoryBusWidth)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(l2CacheSize)
|
||||
# endif
|
||||
# endif
|
||||
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerBlock)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(regsPerBlock)
|
||||
@@ -247,7 +348,6 @@ add_hip_device_metadata(ArchiveT& ar)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pciBusID)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pciDeviceID)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(computeMode)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(computeMode)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(gcnArch)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(gcnArchName)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(isMultiGpuBoard)
|
||||
@@ -259,8 +359,118 @@ add_hip_device_metadata(ArchiveT& ar)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(concurrentKernels)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSharedMemoryPerMultiProcessor)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(asicRevision)
|
||||
# else
|
||||
# define OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(NAME) \
|
||||
device_prop_serialize(ar, #NAME, _device_prop.NAME);
|
||||
|
||||
const char* _compute_mode_descr[] = {
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(name)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(uuid)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(luid)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(luidDeviceNodeMask)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(totalGlobalMem)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerBlock)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(regsPerBlock)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(warpSize)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memPitch)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerBlock)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxThreadsDim)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxGridSize)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(clockRate)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(totalConstMem)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(major)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(minor)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(textureAlignment)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(texturePitchAlignment)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(deviceOverlap)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(multiProcessorCount)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(kernelExecTimeoutEnabled)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(integrated)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(canMapHostMemory)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(computeMode)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture1D)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture1DMipmap)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture1DLinear)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture2D)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DMipmap)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DLinear)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DGather)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture3D)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture3DAlt)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTextureCubemap)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture1DLayered)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DLayered)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTextureCubemapLayered)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurface1D)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurface2D)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurface3D)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurface1DLayered)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurface2DLayered)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurfaceCubemap)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurfaceCubemapLayered)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(surfaceAlignment)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(concurrentKernels)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(ECCEnabled)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pciBusID)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pciDeviceID)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pciDomainID)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(tccDriver)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(asyncEngineCount)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(unifiedAddressing)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memoryClockRate)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memoryBusWidth)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(l2CacheSize)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(persistingL2CacheMaxSize)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerMultiProcessor)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(streamPrioritiesSupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(globalL1CacheSupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(localL1CacheSupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerMultiprocessor)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(regsPerMultiprocessor)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(managedMemory)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(isMultiGpuBoard)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(multiGpuBoardGroupID)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(hostNativeAtomicSupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(singleToDoublePrecisionPerfRatio)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccess)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(concurrentManagedAccess)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(computePreemptionSupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(canUseHostPointerForRegisteredMem)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeLaunch)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceLaunch)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerBlockOptin)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccessUsesHostPageTables)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(directManagedMemAccessFromHost)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxBlocksPerMultiProcessor)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(accessPolicyMaxWindowSize)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(reservedSharedMemPerBlock)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(hostRegisterSupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(sparseHipArraySupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(hostRegisterReadOnlySupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(timelineSemaphoreInteropSupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memoryPoolsSupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(gpuDirectRDMASupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(gpuDirectRDMAFlushWritesOptions)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(gpuDirectRDMAWritesOrdering)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memoryPoolSupportedHandleTypes)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(deferredMappingHipArraySupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(ipcEventSupported)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(clusterLaunch)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(unifiedFunctionPointers)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(gcnArchName)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSharedMemoryPerMultiProcessor)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(clockInstructionRate)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(arch)
|
||||
// OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(hdpMemFlushCntl)
|
||||
// OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(hdpRegFlushCntl)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedFunc)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedGridDim)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedBlockDim)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedSharedMem)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(isLargeBar)
|
||||
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(asicRevision)
|
||||
# endif
|
||||
|
||||
constexpr auto _compute_mode_descr = std::array<const char*, 6>{
|
||||
"Default (multiple host threads can use ::hipSetDevice() with device "
|
||||
"simultaneously)",
|
||||
"Exclusive (only one host thread in one process is able to use "
|
||||
@@ -271,8 +481,11 @@ add_hip_device_metadata(ArchiveT& ar)
|
||||
"Unknown",
|
||||
nullptr
|
||||
};
|
||||
|
||||
auto _compute_mode = std::min<int>(_device_prop.computeMode, 5);
|
||||
ar(make_nvp("computeModeDescription",
|
||||
std::string{ _compute_mode_descr[_device_prop.computeMode] }));
|
||||
std::string{ _compute_mode_descr.at(_compute_mode) }));
|
||||
|
||||
ar.finishNode();
|
||||
}
|
||||
#else
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/defines.hpp"
|
||||
|
||||
#if defined(OMNITRACE_USE_HIP) && OMNITRACE_USE_HIP > 0
|
||||
|
||||
# if defined(HIP_INCLUDE_HIP_HIP_RUNTIME_H) || \
|
||||
defined(HIP_INCLUDE_HIP_HIP_RUNTIME_API_H)
|
||||
# error \
|
||||
"include core/hip_runtime.hpp before <hip/hip_runtime.h> or <hip/hip_runtime_api.h>"
|
||||
# endif
|
||||
|
||||
# define HIP_PROF_HIP_API_STRING 1
|
||||
|
||||
// following must be included before <roctracer_hip.h> for ROCm 6.0+
|
||||
# if OMNITRACE_HIP_VERSION >= 60000
|
||||
# if defined(USE_PROF_API)
|
||||
# undef USE_PROF_API
|
||||
# endif
|
||||
# include <hip/hip_runtime.h>
|
||||
# include <hip/hip_runtime_api.h>
|
||||
// must be included after hip_runtime_api.h
|
||||
# include <hip/hip_deprecated.h>
|
||||
// must be included after hip_runtime_api.h
|
||||
# include <hip_ostream_ops.h>
|
||||
// must be included after hip_runtime_api.h
|
||||
# include <hip/amd_detail/hip_prof_str.h>
|
||||
# else
|
||||
# include <hip/hip_runtime.h>
|
||||
# include <hip/hip_runtime_api.h>
|
||||
# endif
|
||||
|
||||
# include <hip/hip_version.h>
|
||||
#endif
|
||||
@@ -174,6 +174,15 @@ post_process(tim::manager* _timemory_manager, bool& _perfetto_output_error)
|
||||
{
|
||||
_tmp_file->close();
|
||||
FILE* _fdata = fopen(_tmp_file->filename.c_str(), "rb");
|
||||
|
||||
if(!_fdata)
|
||||
{
|
||||
OMNITRACE_VERBOSE(
|
||||
-1, "Error! perfetto temp trace file '%s' could not be read",
|
||||
_tmp_file->filename.c_str());
|
||||
return char_vec_t{ tracing_session->ReadTraceBlocking() };
|
||||
}
|
||||
|
||||
fseek(_fdata, 0, SEEK_END);
|
||||
size_t _fnum_elem = ftell(_fdata);
|
||||
fseek(_fdata, 0, SEEK_SET); // same as rewind(f);
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/defines.hpp"
|
||||
#include "core/hip_runtime.hpp"
|
||||
|
||||
#if defined(OMNITRACE_USE_HIP) && OMNITRACE_USE_HIP > 0 && \
|
||||
defined(OMNITRACE_USE_RCCL) && OMNITRACE_USE_RCCL > 0
|
||||
# if OMNITRACE_HIP_VERSION == 0 || OMNITRACE_HIP_VERSION >= 50200
|
||||
# include <rccl/rccl.h>
|
||||
# else
|
||||
# include <rccl.h>
|
||||
# endif
|
||||
#endif
|
||||
@@ -726,6 +726,13 @@ omnitrace_finalize_hidden(void)
|
||||
}
|
||||
}
|
||||
|
||||
if(get_use_roctracer())
|
||||
{
|
||||
OMNITRACE_VERBOSE_F(1, "Flushing roctracer...\n");
|
||||
// ensure that roctracer is flushed before setting the state to finalized
|
||||
comp::roctracer::flush();
|
||||
}
|
||||
|
||||
set_state(State::Finalized);
|
||||
|
||||
push_enable_sampling_on_child_threads(false);
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "core/common.hpp"
|
||||
#include "core/components/fwd.hpp"
|
||||
#include "core/defines.hpp"
|
||||
#include "core/rccl.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
#include "library/components/category_region.hpp"
|
||||
|
||||
@@ -37,14 +38,6 @@
|
||||
|
||||
#include <optional>
|
||||
|
||||
#if defined(OMNITRACE_USE_RCCL)
|
||||
# if OMNITRACE_HIP_VERSION == 0 || OMNITRACE_HIP_VERSION >= 50200
|
||||
# include <rccl/rccl.h>
|
||||
# else
|
||||
# include <rccl.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(OMNITRACE_USE_MPI)
|
||||
# include <mpi.h>
|
||||
#endif
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "core/common.hpp"
|
||||
#include "core/components/fwd.hpp"
|
||||
#include "core/defines.hpp"
|
||||
#include "core/rccl.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
#include "library/components/category_region.hpp"
|
||||
#include "library/components/comm_data.hpp"
|
||||
@@ -32,12 +33,6 @@
|
||||
#include <timemory/api/macros.hpp>
|
||||
#include <timemory/components/macros.hpp>
|
||||
|
||||
#if OMNITRACE_HIP_VERSION == 0 || OMNITRACE_HIP_VERSION >= 50200
|
||||
# include <rccl/rccl.h>
|
||||
#else
|
||||
# include <rccl.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#include "library/thread_data.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
|
||||
#include <chrono>
|
||||
#include <roctracer.h>
|
||||
|
||||
#define HIP_PROF_HIP_API_STRING 1
|
||||
@@ -272,6 +273,41 @@ roctracer::setup(void* table, bool on_load_trace)
|
||||
OMNITRACE_VERBOSE_F(1, "roctracer is setup\n");
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::flush()
|
||||
{
|
||||
auto wait_for_activity_flush_completion = []() {
|
||||
uint16_t nitr = 0;
|
||||
while(roctracer_activity_count() > 0 && nitr++ < 10)
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{ 100 });
|
||||
};
|
||||
|
||||
// a flush may already be happening
|
||||
wait_for_activity_flush_completion();
|
||||
|
||||
if(roctracer_activity_count() == 0)
|
||||
{
|
||||
OMNITRACE_VERBOSE_F(2, "executing roctracer_flush_activity()...\n");
|
||||
OMNITRACE_ROCTRACER_CALL(roctracer_flush_activity());
|
||||
// wait to make sure flush completes
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{ 100 });
|
||||
wait_for_activity_flush_completion();
|
||||
}
|
||||
else
|
||||
{
|
||||
OMNITRACE_CI_FAIL(true,
|
||||
"roctracer_activity_count() != 0 (== %li). "
|
||||
"roctracer::shutdown() most likely called during abort",
|
||||
roctracer_activity_count().load());
|
||||
}
|
||||
|
||||
OMNITRACE_VERBOSE_F(2, "executing hip_exec_activity_callbacks(0..%zu)\n",
|
||||
thread_info::get_peak_num_threads());
|
||||
// make sure all async operations are executed
|
||||
for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i)
|
||||
hip_exec_activity_callbacks(i);
|
||||
}
|
||||
|
||||
void
|
||||
roctracer::shutdown()
|
||||
{
|
||||
@@ -282,16 +318,11 @@ roctracer::shutdown()
|
||||
tim::storage<comp::roctracer_data>::instance()->reset();
|
||||
return;
|
||||
}
|
||||
|
||||
roctracer_is_setup() = false;
|
||||
|
||||
OMNITRACE_VERBOSE_F(1, "shutting down roctracer...\n");
|
||||
|
||||
OMNITRACE_VERBOSE_F(2, "executing hip_exec_activity_callbacks(0..%zu)\n",
|
||||
thread_info::get_peak_num_threads());
|
||||
// make sure all async operations are executed
|
||||
for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i)
|
||||
hip_exec_activity_callbacks(i);
|
||||
|
||||
// callback for hsa
|
||||
OMNITRACE_VERBOSE_F(2, "executing %zu roctracer_shutdown_routines...\n",
|
||||
roctracer_shutdown_routines().size());
|
||||
@@ -352,19 +383,6 @@ roctracer::shutdown()
|
||||
roctracer_disable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));
|
||||
}
|
||||
|
||||
if(roctracer_activity_count() == 0)
|
||||
{
|
||||
OMNITRACE_VERBOSE_F(2, "executing roctracer_flush_activity()...\n");
|
||||
OMNITRACE_ROCTRACER_CALL(roctracer_flush_activity());
|
||||
}
|
||||
else
|
||||
{
|
||||
OMNITRACE_CI_FAIL(true,
|
||||
"roctracer_activity_count() != 0 (== %li). "
|
||||
"roctracer::shutdown() most likely called during abort",
|
||||
roctracer_activity_count().load());
|
||||
}
|
||||
|
||||
OMNITRACE_VERBOSE_F(1, "roctracer is shutdown\n");
|
||||
}
|
||||
|
||||
|
||||
@@ -58,6 +58,7 @@ struct roctracer
|
||||
|
||||
static bool is_setup();
|
||||
static void setup(void* hsa_api_table, bool on_load_trace = false);
|
||||
static void flush();
|
||||
static void shutdown();
|
||||
static void add_setup(const std::string&, std::function<void()>&&);
|
||||
static void add_shutdown(const std::string&, std::function<void()>&&);
|
||||
@@ -77,6 +78,10 @@ inline void
|
||||
roctracer::setup(void*, bool)
|
||||
{}
|
||||
|
||||
inline void
|
||||
roctracer::flush()
|
||||
{}
|
||||
|
||||
inline void
|
||||
roctracer::shutdown()
|
||||
{}
|
||||
|
||||
@@ -26,17 +26,12 @@
|
||||
#include "core/components/fwd.hpp"
|
||||
#include "core/defines.hpp"
|
||||
#include "core/dynamic_library.hpp"
|
||||
#include "core/rccl.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
#include "library/components/category_region.hpp"
|
||||
|
||||
#include <timemory/timemory.hpp>
|
||||
|
||||
#if OMNITRACE_HIP_VERSION == 0 || OMNITRACE_HIP_VERSION >= 50200
|
||||
# include <rccl/rccl.h>
|
||||
#else
|
||||
# include <rccl.h>
|
||||
#endif
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
||||
@@ -44,8 +44,6 @@
|
||||
#include <cstdint>
|
||||
#include <tuple>
|
||||
|
||||
#define HIP_PROF_HIP_API_STRING 1
|
||||
|
||||
#include <roctracer_ext.h>
|
||||
#include <roctracer_hip.h>
|
||||
#include <roctracer_roctx.h>
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
|
||||
#include "core/config.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/hip_runtime.hpp"
|
||||
#include "core/perfetto.hpp"
|
||||
#include "library/components/roctracer.hpp"
|
||||
#include "library/ptl.hpp"
|
||||
|
||||
@@ -28,6 +28,18 @@ omnitrace_add_test(
|
||||
uniform_int_distribution
|
||||
ENVIRONMENT "${_base_environment};OMNITRACE_CRITICAL_TRACE=ON")
|
||||
|
||||
omnitrace_add_test(
|
||||
SKIP_REWRITE SKIP_RUNTIME
|
||||
NAME transpose-two-kernels
|
||||
TARGET transpose
|
||||
MPI OFF
|
||||
GPU ON
|
||||
NUM_PROCS 1
|
||||
RUN_ARGS 1 2 2
|
||||
ENVIRONMENT
|
||||
"${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_ROCTRACER_HSA_ACTIVITY=OFF;OMNITRACE_ROCTRACER_HSA_API=OFF"
|
||||
)
|
||||
|
||||
omnitrace_add_test(
|
||||
SKIP_BASELINE SKIP_RUNTIME
|
||||
NAME transpose-loops
|
||||
|
||||
Ссылка в новой задаче
Block a user