Rename Omnitrace to ROCm Systems Profiler (#4)

The Omnitrace program is being renamed. 

Full name: "ROCm Systems Profiler"
Package name: "rocprofiler-systems"
Binary / Library names: "rocprof-sys-*"

---------
Co-authored-by: Xuan Chen <xuchen@amd.com>
Signed-off-by: David Galiffi <David.Galiffi@amd.com>

[ROCm/rocprofiler-systems commit: d07bf508a9]
Этот коммит содержится в:
David Galiffi
2024-10-15 11:20:40 -04:00
коммит произвёл GitHub
родитель c444ba6131
Коммит 489eda995d
420 изменённых файлов: 10418 добавлений и 9914 удалений
+208 -208
Просмотреть файл
@@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
// Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
@@ -22,17 +22,17 @@
#include "common/defines.h"
#if !defined(OMNITRACE_USE_ROCM_SMI)
# define OMNITRACE_USE_ROCM_SMI 0
#if !defined(ROCPROFSYS_USE_ROCM_SMI)
# define ROCPROFSYS_USE_ROCM_SMI 0
#endif
#if !defined(OMNITRACE_USE_HIP)
# define OMNITRACE_USE_HIP 0
#if !defined(ROCPROFSYS_USE_HIP)
# define ROCPROFSYS_USE_HIP 0
#endif
#include "core/hip_runtime.hpp"
#if OMNITRACE_USE_HIP > 0
#if ROCPROFSYS_USE_HIP > 0
# if !defined(TIMEMORY_USE_HIP)
# define TIMEMORY_USE_HIP 1
# endif
@@ -44,19 +44,19 @@
#include <timemory/manager.hpp>
#if OMNITRACE_USE_ROCM_SMI > 0
#if ROCPROFSYS_USE_ROCM_SMI > 0
# include <rocm_smi/rocm_smi.h>
#endif
#if OMNITRACE_USE_HIP > 0
#if ROCPROFSYS_USE_HIP > 0
# include <timemory/components/hip/backends.hpp>
# if !defined(OMNITRACE_HIP_RUNTIME_CALL)
# define OMNITRACE_HIP_RUNTIME_CALL(err) \
# if !defined(ROCPROFSYS_HIP_RUNTIME_CALL)
# define ROCPROFSYS_HIP_RUNTIME_CALL(err) \
{ \
if(err != ::tim::hip::success_v && (int) err != 0) \
{ \
OMNITRACE_THROW( \
ROCPROFSYS_THROW( \
"[%s:%d] Warning! HIP API call failed with code %i :: %s\n", \
__FILE__, __LINE__, (int) err, hipGetErrorString(err)); \
} \
@@ -64,7 +64,7 @@
# endif
#endif
namespace omnitrace
namespace rocprofsys
{
namespace gpu
{
@@ -72,9 +72,9 @@ namespace
{
namespace scope = ::tim::scope;
#if OMNITRACE_USE_ROCM_SMI > 0
# define OMNITRACE_ROCM_SMI_CALL(ERROR_CODE) \
::omnitrace::gpu::check_rsmi_error(ERROR_CODE, __FILE__, __LINE__)
#if ROCPROFSYS_USE_ROCM_SMI > 0
# define ROCPROFSYS_ROCM_SMI_CALL(ERROR_CODE) \
::rocprofsys::gpu::check_rsmi_error(ERROR_CODE, __FILE__, __LINE__)
void
check_rsmi_error(rsmi_status_t _code, const char* _file, int _line)
@@ -83,11 +83,11 @@ check_rsmi_error(rsmi_status_t _code, const char* _file, int _line)
const char* _msg = nullptr;
auto _err = rsmi_status_string(_code, &_msg);
if(_err != RSMI_STATUS_SUCCESS)
OMNITRACE_THROW("rsmi_status_string failed. No error message available. "
"Error code %i originated at %s:%i\n",
static_cast<int>(_code), _file, _line);
OMNITRACE_THROW("[%s:%i] Error code %i :: %s", _file, _line, static_cast<int>(_code),
_msg);
ROCPROFSYS_THROW("rsmi_status_string failed. No error message available. "
"Error code %i originated at %s:%i\n",
static_cast<int>(_code), _file, _line);
ROCPROFSYS_THROW("[%s:%i] Error code %i :: %s", _file, _line, static_cast<int>(_code),
_msg);
}
bool
@@ -96,11 +96,11 @@ rsmi_init()
auto _rsmi_init = []() {
try
{
OMNITRACE_ROCM_SMI_CALL(::rsmi_init(0));
ROCPROFSYS_ROCM_SMI_CALL(::rsmi_init(0));
} catch(std::exception& _e)
{
OMNITRACE_BASIC_VERBOSE(1, "Exception thrown initializing rocm-smi: %s\n",
_e.what());
ROCPROFSYS_BASIC_VERBOSE(1, "Exception thrown initializing rocm-smi: %s\n",
_e.what());
return false;
}
return true;
@@ -110,7 +110,7 @@ rsmi_init()
}
#endif
#if OMNITRACE_HIP_VERSION >= 60000
#if ROCPROFSYS_HIP_VERSION >= 60000
template <typename ArchiveT, typename ArgT,
std::enable_if_t<!std::is_pointer<ArgT>::value, int> = 0>
void
@@ -164,7 +164,7 @@ device_prop_serialize(ArchiveT& archive, const char* name, hipDeviceArch_t arg)
namespace cereal = tim::cereal;
using cereal::make_nvp;
# define OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(NAME) \
# define ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(NAME) \
{ \
auto val = arg.NAME; \
archive(make_nvp(#NAME, val)); \
@@ -172,26 +172,26 @@ device_prop_serialize(ArchiveT& archive, const char* name, hipDeviceArch_t arg)
archive.setNextName(name);
archive.startNode();
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasGlobalInt32Atomics)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasGlobalFloatAtomicExch)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasSharedInt32Atomics)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasSharedFloatAtomicExch)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasFloatAtomicAdd)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasGlobalInt64Atomics)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasSharedInt64Atomics)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasDoubles)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasWarpVote)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasWarpBallot)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasWarpShuffle)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasFunnelShift)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasThreadFenceSystem)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasSyncThreadsExt)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasSurfaceFuncs)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(has3dGrid)
OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH(hasDynamicParallelism)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasGlobalInt32Atomics)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasGlobalFloatAtomicExch)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasSharedInt32Atomics)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasSharedFloatAtomicExch)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasFloatAtomicAdd)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasGlobalInt64Atomics)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasSharedInt64Atomics)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasDoubles)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasWarpVote)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasWarpBallot)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasWarpShuffle)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasFunnelShift)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasThreadFenceSystem)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasSyncThreadsExt)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasSurfaceFuncs)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(has3dGrid)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH(hasDynamicParallelism)
archive.finishNode();
# undef OMNITRACE_SERIALIZE_HIP_DEVICE_ARCH
# undef ROCPROFSYS_SERIALIZE_HIP_DEVICE_ARCH
}
#endif
} // namespace
@@ -199,7 +199,7 @@ device_prop_serialize(ArchiveT& archive, const char* name, hipDeviceArch_t arg)
int
hip_device_count()
{
#if OMNITRACE_USE_HIP > 0
#if ROCPROFSYS_USE_HIP > 0
return ::tim::hip::device_count();
#else
return 0;
@@ -209,17 +209,17 @@ hip_device_count()
int
rsmi_device_count()
{
#if OMNITRACE_USE_ROCM_SMI > 0
#if ROCPROFSYS_USE_ROCM_SMI > 0
if(!rsmi_init()) return 0;
static auto _num_devices = []() {
uint32_t _v = 0;
try
{
OMNITRACE_ROCM_SMI_CALL(rsmi_num_monitor_devices(&_v));
ROCPROFSYS_ROCM_SMI_CALL(rsmi_num_monitor_devices(&_v));
} catch(std::exception& _e)
{
OMNITRACE_BASIC_VERBOSE(
ROCPROFSYS_BASIC_VERBOSE(
1, "Exception thrown getting the rocm-smi devices: %s\n", _e.what());
}
return _v;
@@ -234,10 +234,10 @@ rsmi_device_count()
int
device_count()
{
#if OMNITRACE_USE_ROCM_SMI > 0
#if ROCPROFSYS_USE_ROCM_SMI > 0
// store as static since calls after rsmi_shutdown will return zero
return rsmi_device_count();
#elif OMNITRACE_USE_HIP > 0
#elif ROCPROFSYS_USE_HIP > 0
return ::tim::hip::device_count();
#else
return 0;
@@ -251,7 +251,7 @@ add_hip_device_metadata(ArchiveT& ar)
namespace cereal = tim::cereal;
using cereal::make_nvp;
#if OMNITRACE_USE_HIP > 0
#if ROCPROFSYS_USE_HIP > 0
int _device_count = 0;
int _current_device = 0;
hipError_t _device_count_err = hipGetDeviceCount(&_device_count);
@@ -263,7 +263,7 @@ add_hip_device_metadata(ArchiveT& ar)
scope::destructor _dtor{ [_current_device, _current_device_err]() {
if(_current_device_err == hipSuccess)
{
OMNITRACE_HIP_RUNTIME_CALL(hipSetDevice(_current_device));
ROCPROFSYS_HIP_RUNTIME_CALL(hipSetDevice(_current_device));
}
} };
@@ -279,20 +279,20 @@ add_hip_device_metadata(ArchiveT& ar)
auto _device_prop = hipDeviceProp_t{};
int _driver_version = 0;
int _runtime_version = 0;
OMNITRACE_HIP_RUNTIME_CALL(hipSetDevice(dev));
OMNITRACE_HIP_RUNTIME_CALL(hipGetDeviceProperties(&_device_prop, dev));
OMNITRACE_HIP_RUNTIME_CALL(hipDriverGetVersion(&_driver_version));
OMNITRACE_HIP_RUNTIME_CALL(hipRuntimeGetVersion(&_runtime_version));
ROCPROFSYS_HIP_RUNTIME_CALL(hipSetDevice(dev));
ROCPROFSYS_HIP_RUNTIME_CALL(hipGetDeviceProperties(&_device_prop, dev));
ROCPROFSYS_HIP_RUNTIME_CALL(hipDriverGetVersion(&_driver_version));
ROCPROFSYS_HIP_RUNTIME_CALL(hipRuntimeGetVersion(&_runtime_version));
ar.startNode();
# if OMNITRACE_HIP_VERSION < 60000
# if ROCPROFSYS_HIP_VERSION < 60000
using intvec_t = std::vector<int>;
# define OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(NAME) \
# define ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(NAME) \
ar(make_nvp(#NAME, _device_prop.NAME));
# define OMNITRACE_SERIALIZE_HIP_DEVICE_PROP_ARRAY(NAME, ...) \
# define ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP_ARRAY(NAME, ...) \
ar(make_nvp(NAME, __VA_ARGS__));
ar(make_nvp("name", std::string{ _device_prop.name }));
@@ -301,162 +301,162 @@ add_hip_device_metadata(ArchiveT& ar)
ar(make_nvp("capability.major_version", _device_prop.major));
ar(make_nvp("capability.minor_version", _device_prop.minor));
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(totalGlobalMem)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(totalConstMem)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(clockRate)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(totalGlobalMem)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(totalConstMem)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(clockRate)
# if OMNITRACE_HIP_VERSION >= 50000
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memoryClockRate)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memoryBusWidth)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(l2CacheSize)
# if ROCPROFSYS_HIP_VERSION >= 50000
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memoryClockRate)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memoryBusWidth)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(l2CacheSize)
# endif
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerBlock)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(regsPerBlock)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(warpSize)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(multiProcessorCount)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerMultiProcessor)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerBlock)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP_ARRAY(
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerBlock)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(regsPerBlock)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(warpSize)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(multiProcessorCount)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerMultiProcessor)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerBlock)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP_ARRAY(
"maxThreadsDim",
intvec_t{ _device_prop.maxThreadsDim[0], _device_prop.maxThreadsDim[1],
_device_prop.maxThreadsDim[2] })
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP_ARRAY("maxGridSize",
intvec_t{ _device_prop.maxGridSize[0],
_device_prop.maxGridSize[1],
_device_prop.maxGridSize[2] })
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memPitch)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(textureAlignment)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(kernelExecTimeoutEnabled)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(integrated)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(canMapHostMemory)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(ECCEnabled)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeLaunch)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceLaunch)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pciDomainID)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pciBusID)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pciDeviceID)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(computeMode)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(gcnArch)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(gcnArchName)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(isMultiGpuBoard)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(clockInstructionRate)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccess)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccessUsesHostPageTables)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(directManagedMemAccessFromHost)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(concurrentManagedAccess)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(concurrentKernels)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSharedMemoryPerMultiProcessor)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(asicRevision)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP_ARRAY(
"maxGridSize",
intvec_t{ _device_prop.maxGridSize[0], _device_prop.maxGridSize[1],
_device_prop.maxGridSize[2] })
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memPitch)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(textureAlignment)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(kernelExecTimeoutEnabled)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(integrated)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(canMapHostMemory)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(ECCEnabled)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeLaunch)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceLaunch)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pciDomainID)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pciBusID)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pciDeviceID)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(computeMode)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(gcnArch)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(gcnArchName)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(isMultiGpuBoard)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(clockInstructionRate)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccess)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccessUsesHostPageTables)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(directManagedMemAccessFromHost)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(concurrentManagedAccess)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(concurrentKernels)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSharedMemoryPerMultiProcessor)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(asicRevision)
# else
# define OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(NAME) \
# define ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(NAME) \
device_prop_serialize(ar, #NAME, _device_prop.NAME);
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(name)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(uuid)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(luid)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(luidDeviceNodeMask)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(totalGlobalMem)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerBlock)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(regsPerBlock)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(warpSize)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memPitch)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerBlock)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxThreadsDim)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxGridSize)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(clockRate)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(totalConstMem)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(major)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(minor)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(textureAlignment)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(texturePitchAlignment)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(deviceOverlap)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(multiProcessorCount)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(kernelExecTimeoutEnabled)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(integrated)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(canMapHostMemory)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(computeMode)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture1D)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture1DMipmap)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture1DLinear)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture2D)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DMipmap)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DLinear)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DGather)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture3D)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture3DAlt)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTextureCubemap)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture1DLayered)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DLayered)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxTextureCubemapLayered)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurface1D)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurface2D)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurface3D)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurface1DLayered)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurface2DLayered)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurfaceCubemap)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSurfaceCubemapLayered)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(surfaceAlignment)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(concurrentKernels)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(ECCEnabled)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pciBusID)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pciDeviceID)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pciDomainID)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(tccDriver)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(asyncEngineCount)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(unifiedAddressing)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memoryClockRate)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memoryBusWidth)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(l2CacheSize)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(persistingL2CacheMaxSize)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerMultiProcessor)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(streamPrioritiesSupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(globalL1CacheSupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(localL1CacheSupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerMultiprocessor)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(regsPerMultiprocessor)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(managedMemory)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(isMultiGpuBoard)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(multiGpuBoardGroupID)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(hostNativeAtomicSupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(singleToDoublePrecisionPerfRatio)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccess)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(concurrentManagedAccess)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(computePreemptionSupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(canUseHostPointerForRegisteredMem)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeLaunch)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceLaunch)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerBlockOptin)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccessUsesHostPageTables)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(directManagedMemAccessFromHost)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxBlocksPerMultiProcessor)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(accessPolicyMaxWindowSize)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(reservedSharedMemPerBlock)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(hostRegisterSupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(sparseHipArraySupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(hostRegisterReadOnlySupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(timelineSemaphoreInteropSupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memoryPoolsSupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(gpuDirectRDMASupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(gpuDirectRDMAFlushWritesOptions)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(gpuDirectRDMAWritesOrdering)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(memoryPoolSupportedHandleTypes)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(deferredMappingHipArraySupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(ipcEventSupported)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(clusterLaunch)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(unifiedFunctionPointers)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(gcnArchName)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(maxSharedMemoryPerMultiProcessor)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(clockInstructionRate)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(arch)
// OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(hdpMemFlushCntl)
// OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(hdpRegFlushCntl)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedFunc)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedGridDim)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedBlockDim)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedSharedMem)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(isLargeBar)
OMNITRACE_SERIALIZE_HIP_DEVICE_PROP(asicRevision)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(name)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(uuid)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(luid)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(luidDeviceNodeMask)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(totalGlobalMem)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerBlock)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(regsPerBlock)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(warpSize)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memPitch)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerBlock)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxThreadsDim)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxGridSize)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(clockRate)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(totalConstMem)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(major)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(minor)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(textureAlignment)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(texturePitchAlignment)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(deviceOverlap)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(multiProcessorCount)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(kernelExecTimeoutEnabled)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(integrated)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(canMapHostMemory)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(computeMode)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture1D)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture1DMipmap)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture1DLinear)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture2D)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DMipmap)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DLinear)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DGather)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture3D)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture3DAlt)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTextureCubemap)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture1DLayered)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTexture2DLayered)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxTextureCubemapLayered)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurface1D)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurface2D)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurface3D)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurface1DLayered)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurface2DLayered)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurfaceCubemap)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSurfaceCubemapLayered)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(surfaceAlignment)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(concurrentKernels)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(ECCEnabled)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pciBusID)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pciDeviceID)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pciDomainID)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(tccDriver)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(asyncEngineCount)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(unifiedAddressing)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memoryClockRate)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memoryBusWidth)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(l2CacheSize)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(persistingL2CacheMaxSize)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxThreadsPerMultiProcessor)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(streamPrioritiesSupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(globalL1CacheSupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(localL1CacheSupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerMultiprocessor)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(regsPerMultiprocessor)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(managedMemory)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(isMultiGpuBoard)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(multiGpuBoardGroupID)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(hostNativeAtomicSupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(singleToDoublePrecisionPerfRatio)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccess)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(concurrentManagedAccess)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(computePreemptionSupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(canUseHostPointerForRegisteredMem)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeLaunch)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceLaunch)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(sharedMemPerBlockOptin)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(pageableMemoryAccessUsesHostPageTables)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(directManagedMemAccessFromHost)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxBlocksPerMultiProcessor)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(accessPolicyMaxWindowSize)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(reservedSharedMemPerBlock)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(hostRegisterSupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(sparseHipArraySupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(hostRegisterReadOnlySupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(timelineSemaphoreInteropSupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memoryPoolsSupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(gpuDirectRDMASupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(gpuDirectRDMAFlushWritesOptions)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(gpuDirectRDMAWritesOrdering)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(memoryPoolSupportedHandleTypes)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(deferredMappingHipArraySupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(ipcEventSupported)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(clusterLaunch)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(unifiedFunctionPointers)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(gcnArchName)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(maxSharedMemoryPerMultiProcessor)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(clockInstructionRate)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(arch)
// ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(hdpMemFlushCntl)
// ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(hdpRegFlushCntl)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedFunc)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedGridDim)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedBlockDim)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(cooperativeMultiDeviceUnmatchedSharedMem)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(isLargeBar)
ROCPROFSYS_SERIALIZE_HIP_DEVICE_PROP(asicRevision)
# endif
const auto _compute_mode_descr = std::array<const char*, 6>{
@@ -487,15 +487,15 @@ add_hip_device_metadata()
{
if(device_count() == 0) return;
OMNITRACE_METADATA([](auto& ar) {
ROCPROFSYS_METADATA([](auto& ar) {
try
{
add_hip_device_metadata(ar);
} catch(std::runtime_error& _e)
{
OMNITRACE_VERBOSE(2, "%s\n", _e.what());
ROCPROFSYS_VERBOSE(2, "%s\n", _e.what());
}
});
}
} // namespace gpu
} // namespace omnitrace
} // namespace rocprofsys