Files
rocm-systems/src/include/nvtx.h
T
Bertan Dogancay b617aecc31 Implement ROCTX (#1094)
* Implement roctx
2024-02-27 15:46:15 -07:00

99 строки
3.6 KiB
C++

/*************************************************************************
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#ifndef NCCL_NVTX_H_
#define NCCL_NVTX_H_
#include "nvtx3/nvtx3.hpp"
#include "roctx.h"
#if __cpp_constexpr >= 201304L && !defined(NVTX3_CONSTEXPR_IF_CPP14)
#define NVTX3_CONSTEXPR_IF_CPP14 constexpr
#else
#define NVTX3_CONSTEXPR_IF_CPP14
#endif
// Define all NCCL-provided static schema IDs here (avoid duplicates).
#define NVTX_SID_CommInitRank 0
#define NVTX_SID_CommInitAll 1
#define NVTX_SID_CommDestroy 2 // same schema as NVTX_SID_CommInitRank
#define NVTX_SID_CommAbort 3 // same schema as NVTX_SID_CommInitRank
#define NVTX_SID_AllGather 4
#define NVTX_SID_AllReduce 5
#define NVTX_SID_AllToAll 6
#define NVTX_SID_AllToAllv 7
#define NVTX_SID_Broadcast 8
#define NVTX_SID_Gather 9
#define NVTX_SID_MSCCL 10
#define NVTX_SID_ReduceScatter 11
#define NVTX_SID_Reduce 12
#define NVTX_SID_Scatter 13
#define NVTX_SID_Send 14
#define NVTX_SID_Recv 15
// Define static schema ID for the reduction operation.
#define NVTX_PAYLOAD_ENTRY_NCCL_REDOP 11 + NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START
extern const nvtxDomainHandle_t ncclNvtxDomainHandle;
struct nccl_domain{static constexpr char const* name{"NCCL"};};
class payload_schema {
public:
explicit payload_schema(const nvtxPayloadSchemaEntry_t entries[], size_t numEntries, const uint64_t schemaId, const char* schemaName = nullptr) noexcept
{
schema_attr.name = schemaName;
schema_attr.entries = entries;
schema_attr.numEntries = numEntries;
schema_attr.schemaId = schemaId;
nvtxPayloadSchemaRegister(nvtx3::domain::get<nccl_domain>(), &schema_attr);
}
payload_schema() = delete;
~payload_schema() = default;
payload_schema(payload_schema const&) = default;
payload_schema& operator=(payload_schema const&) = default;
payload_schema(payload_schema&&) = default;
payload_schema& operator=(payload_schema&&) = default;
private:
nvtxPayloadSchemaAttr_t schema_attr{
NVTX_PAYLOAD_SCHEMA_ATTR_TYPE |
NVTX_PAYLOAD_SCHEMA_ATTR_ENTRIES |
NVTX_PAYLOAD_SCHEMA_ATTR_NUM_ENTRIES |
NVTX_PAYLOAD_SCHEMA_ATTR_STATIC_SIZE |
NVTX_PAYLOAD_SCHEMA_ATTR_SCHEMA_ID,
nullptr,
NVTX_PAYLOAD_SCHEMA_TYPE_STATIC,
NVTX_PAYLOAD_SCHEMA_FLAG_NONE,
nullptr, 0, 0, 0};
};
// Create NVTX push/pop range with parameters
// @param name of the operation (see `NVTX_SID_*`)
// @param N schema name
// @param S schema (entries)
// @param P payload (struct)
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__)
#define NVTX3_FUNC_WITH_PARAMS(ID, S, P) \
nvtxPayloadData_t nvtx3_bpl__[] = { \
{NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START + NVTX_SID_##ID, sizeof(P), &(P)}}; \
roctx_scoped_range_in const roctx_range__{S, nvtx3_bpl__, std::extent<decltype(S)>::value, "RCCL_" #ID};
#else
#define NVTX3_FUNC_WITH_PARAMS(ID, S, P) \
static const payload_schema schema{S, std::extent<decltype(S)>::value, \
NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START + NVTX_SID_##ID, #ID}; \
static ::nvtx3::v1::registered_string_in<nccl_domain> const nvtx3_func_name__{__func__}; \
nvtxPayloadData_t nvtx3_bpl__[] = { \
{NVTX_PAYLOAD_ENTRY_TYPE_SCHEMA_ID_STATIC_START + NVTX_SID_##ID, sizeof(P), &(P)}}; \
::nvtx3::v1::event_attributes const nvtx3_func_attr__{nvtx3_func_name__, nvtx3_bpl__}; \
::nvtx3::v1::scoped_range_in<nccl_domain> const nvtx3_range__{nvtx3_func_attr__};
#endif
extern void initNvtxRegisteredEnums();
#endif