Merge remote-tracking branch 'nccl/master' into no-target-id
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
@@ -16,6 +16,8 @@ ncclResult_t bootstrapInit(ncclUniqueId* id, int rank, int nranks, void** commSt
|
||||
ncclResult_t bootstrapAllGather(void* commState, void* allData, int size);
|
||||
ncclResult_t bootstrapSend(void* commState, int peer, void* data, int size);
|
||||
ncclResult_t bootstrapRecv(void* commState, int peer, void* data, int size);
|
||||
ncclResult_t bootstrapRemAlloc(size_t size, int rank, void* commState, int* id, hipIpcMemHandle_t* ipc, void** ptr);
|
||||
ncclResult_t bootstrapRemFree(int id, int rank, void* commState);
|
||||
ncclResult_t bootstrapClose(void* commState);
|
||||
ncclResult_t bootstrapAbort(void* commState);
|
||||
#endif
|
||||
|
||||
@@ -24,7 +24,7 @@ static ncclResult_t collNetRegMr(void* comm, void* data, int size, int type, voi
|
||||
static ncclResult_t collNetDeregMr(void* comm, void* mhandle) { NCCLCHECK(ncclCollNet->deregMr(comm, mhandle)); return ncclSuccess; }
|
||||
static ncclResult_t collNetIallreduce(void* collComm, void* sendData, void* recvData, int count, ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request) {
|
||||
NCCLCHECK(ncclCollNet->iallreduce(collComm, sendData, recvData, count, dataType, redOp, sendMhandle, recvMhandle, request)); return ncclSuccess; }
|
||||
static ncclResult_t collNetFlush(void* collComm, void* data, int size, void* mhandle) { NCCLCHECK(ncclCollNet->flush(collComm, data, size, mhandle)); return ncclSuccess; }
|
||||
static ncclResult_t collNetIflush(void* collComm, void* data, int size, void* mhandle, void** request) { NCCLCHECK(ncclCollNet->iflush(collComm, data, size, mhandle, request)); return ncclSuccess; }
|
||||
static ncclResult_t collNetTest(void* request, int* done, int* size) { NCCLCHECK(ncclCollNet->test(request, done, size)); return ncclSuccess; }
|
||||
static ncclResult_t collNetCloseColl(void* collComm) { NCCLCHECK(ncclCollNet->closeColl(collComm)); return ncclSuccess; }
|
||||
static ncclResult_t collNetCloseListen(void* listenComm) { NCCLCHECK(ncclCollNet->closeListen(listenComm)); return ncclSuccess; }
|
||||
|
||||
@@ -8,63 +8,60 @@
|
||||
#ifndef NCCL_COLLECTIVES_H_
|
||||
#define NCCL_COLLECTIVES_H_
|
||||
|
||||
#define FUNC_INDEX_P2P (4+NCCL_NUM_FUNCTIONS*NCCL_NUM_ALGORITHMS*NCCL_NUM_PROTOCOLS*ncclNumTypes*ncclNumOps)
|
||||
#define FUNC_INDEX(coll, redop, dtype, al, pr) ((coll >= NCCL_NUM_FUNCTIONS) \
|
||||
? (coll-NCCL_NUM_FUNCTIONS+NCCL_NUM_FUNCTIONS*NCCL_NUM_ALGORITHMS*NCCL_NUM_PROTOCOLS*ncclNumTypes*ncclNumOps) \
|
||||
: ((((((coll)*ncclNumOps + (redop))*ncclNumTypes) + (dtype))*NCCL_NUM_ALGORITHMS+(al))*NCCL_NUM_PROTOCOLS+(pr)))
|
||||
#define FUNC_INDEX_P2P 1800
|
||||
#define FUNC_INDEX(func, redop, ncclType, al, pr) ((((((func)*ncclNumOps + (redop))*ncclNumTypes) + (ncclType))*NCCL_NUM_ALGORITHMS+(al))*NCCL_NUM_PROTOCOLS+(pr))
|
||||
|
||||
#define NCCL_COLL_NAME(coll, op, dtype) \
|
||||
coll##_##op##_##dtype
|
||||
#define NCCL_FUNC_NAME(func, algo, proto, redop, type) \
|
||||
ncclFunction_##func##_##algo##_##proto##_##redop##_##type
|
||||
|
||||
#define NCCL_KERN_NAME(coll, op, dtype) \
|
||||
coll##Kernel_##op##_##dtype
|
||||
#define NCCL_KERN_NAME(func, algo, proto, redop, type) \
|
||||
ncclKernel_##func##_##algo##_##proto##_##redop##_##type
|
||||
|
||||
#define NCCL_IMPL_NAME(func, algo, proto) \
|
||||
nccl##func##algo##proto
|
||||
|
||||
/* Declare all collective operations */
|
||||
#define DECL_COLL5(coll, op, dtype) \
|
||||
extern __device__ __attribute__((noinline)) void NCCL_COLL_NAME(coll, op, dtype)(struct CollectiveArgs* args); \
|
||||
extern __global__ void NCCL_KERN_NAME(coll, op, dtype)(struct ncclDevComm* comm); \
|
||||
#define DECL5(func, algo, proto, redop, type) \
|
||||
extern __device__ __attribute__((noinline)) void NCCL_FUNC_NAME(func, algo, proto, redop, type)(struct ncclWorkElem* args); \
|
||||
extern __global__ void NCCL_KERN_NAME(func, algo, proto, redop, type)(struct ncclWorkElem first); \
|
||||
|
||||
#define DECL_COLL4(coll, op, dtype) \
|
||||
DECL_COLL5(coll, op, dtype) \
|
||||
DECL_COLL5(coll##LL, op, dtype) \
|
||||
DECL_COLL5(coll##LL128, op, dtype)
|
||||
#define DECL4(func, algo, redop, type) \
|
||||
DECL5(func, algo, SIMPLE, redop, type) \
|
||||
DECL5(func, algo, LL, redop, type) \
|
||||
DECL5(func, algo, LL128, redop, type)
|
||||
|
||||
#define DECL_COLL3(coll, op, dtype) \
|
||||
DECL_COLL4(coll##Ring, op, dtype) \
|
||||
DECL_COLL4(coll##Tree, op, dtype) \
|
||||
DECL_COLL4(coll##CollNet, op, dtype)
|
||||
#define DECL3(func, redop, type) \
|
||||
DECL4(func, RING, redop, type) \
|
||||
DECL4(func, TREE, redop, type) \
|
||||
DECL4(func, COLLNET, redop, type)
|
||||
|
||||
#define DECL_COLL2(coll, op) \
|
||||
DECL_COLL3(coll, op, i8) \
|
||||
DECL_COLL3(coll, op, u8) \
|
||||
DECL_COLL3(coll, op, i32) \
|
||||
DECL_COLL3(coll, op, u32) \
|
||||
DECL_COLL3(coll, op, i64) \
|
||||
DECL_COLL3(coll, op, u64) \
|
||||
DECL_COLL3(coll, op, f16) \
|
||||
DECL_COLL3(coll, op, f32) \
|
||||
DECL_COLL3(coll, op, f64) \
|
||||
DECL_COLL3(coll, op, b16)
|
||||
#define DECL2(func, redop) \
|
||||
DECL3(func, redop, int8_t) \
|
||||
DECL3(func, redop, uint8_t) \
|
||||
DECL3(func, redop, int32_t) \
|
||||
DECL3(func, redop, uint32_t) \
|
||||
DECL3(func, redop, int64_t) \
|
||||
DECL3(func, redop, uint64_t) \
|
||||
DECL3(func, redop, half) \
|
||||
DECL3(func, redop, float) \
|
||||
DECL3(func, redop, double) \
|
||||
DECL3(func, redop, rccl_bfloat16)
|
||||
|
||||
#define DECL_COLL(coll) \
|
||||
DECL_COLL2(coll, sum) \
|
||||
DECL_COLL2(coll, prod) \
|
||||
DECL_COLL2(coll, min) \
|
||||
DECL_COLL2(coll, max)
|
||||
#define DECL(func) \
|
||||
DECL2(func, Sum) \
|
||||
DECL2(func, Prod) \
|
||||
DECL2(func, Min) \
|
||||
DECL2(func, Max)
|
||||
|
||||
#define DECL_ALL_COLLS \
|
||||
DECL_COLL2(ncclBroadcast, copy) \
|
||||
DECL_COLL(ncclReduce) \
|
||||
DECL_COLL2(ncclAllGather, copy) \
|
||||
DECL_COLL(ncclReduceScatter) \
|
||||
DECL_COLL(ncclAllReduce) \
|
||||
DECL_COLL5(ncclGather, copy, i8) \
|
||||
DECL_COLL5(ncclScatter, copy, i8) \
|
||||
DECL_COLL5(ncclAllToAll, copy, i8) \
|
||||
DECL_COLL5(ncclAllToAllv, copy, i8) \
|
||||
DECL_COLL5(ncclSendRecv, copy, i8) \
|
||||
#define DECL_ALL \
|
||||
DECL2(Broadcast, Sum) \
|
||||
DECL(Reduce) \
|
||||
DECL2(AllGather, Sum) \
|
||||
DECL(ReduceScatter) \
|
||||
DECL(AllReduce) \
|
||||
DECL5(SendRecv, RING, SIMPLE, Sum, int8_t) \
|
||||
|
||||
DECL_ALL_COLLS
|
||||
DECL_ALL
|
||||
|
||||
// CHUNKSIZE must be a multiple of SLICESIZE
|
||||
//#define ALLREDUCE_SLICESTEPS (NCCL_STEPS/4)
|
||||
@@ -84,13 +81,4 @@ DECL_ALL_COLLS
|
||||
#define REDUCE_SLICESTEPS 1
|
||||
#define REDUCE_CHUNKSTEPS 1
|
||||
#define SENDRECV_SLICEFACTOR 1
|
||||
#define GATHER_SLICESTEPS 4
|
||||
#define GATHER_CHUNKSTEPS 4
|
||||
#define SCATTER_SLICESTEPS 4
|
||||
#define SCATTER_CHUNKSTEPS 4
|
||||
#define ALLTOALL_SLICESTEPS 4
|
||||
#define ALLTOALL_CHUNKSTEPS 4
|
||||
#define ALLTOALLV_SLICESTEPS 4
|
||||
#define ALLTOALLV_CHUNKSTEPS 4
|
||||
|
||||
#endif
|
||||
|
||||
@@ -52,8 +52,8 @@ struct ncclRecvMem {
|
||||
struct {
|
||||
uint64_t tail;
|
||||
char pad1[CACHE_LINE_SIZE-sizeof(uint64_t)];
|
||||
char pad2[CACHE_LINE_SIZE-sizeof(uint64_t)];
|
||||
int sizesFifo[NCCL_STEPS];
|
||||
void* ptrsFifo[NCCL_STEPS];
|
||||
};
|
||||
char pad4[MEM_ALIGN];
|
||||
};
|
||||
@@ -67,6 +67,10 @@ struct ncclComm {
|
||||
struct ncclTopoSystem* topo;
|
||||
|
||||
void* bootstrap;
|
||||
// Bitmasks for ncclTransportP2pSetup
|
||||
int connect;
|
||||
uint32_t* connectSend;
|
||||
uint32_t* connectRecv;
|
||||
|
||||
int rank; // my rank in the communicator
|
||||
int nRanks; // number of GPUs in communicator
|
||||
@@ -131,8 +135,8 @@ struct ncclComm {
|
||||
int* intraCudaDevs;
|
||||
int* intraCGMode; // Whether we can use CUDA9 CGMD or not
|
||||
int* intraCC; // Only to check all have the same ComputeCap and disable CGMode if not
|
||||
struct ncclDevComm* args;
|
||||
struct ncclDevComm** argsptr;
|
||||
struct ncclWorkElem args;
|
||||
void* argsptr;
|
||||
|
||||
// Global proxy thread
|
||||
pthread_t proxyThread;
|
||||
@@ -140,8 +144,17 @@ struct ncclComm {
|
||||
|
||||
// Whether this communicator uses collNet
|
||||
int collNetSupport;
|
||||
|
||||
// Store info of async operations
|
||||
struct ncclInfo* asyncOps;
|
||||
int asyncOpCount;
|
||||
size_t asyncTotalSize;
|
||||
|
||||
//list of async p2p operation queued in a group semantics
|
||||
struct ncclP2Plist p2plist;
|
||||
struct ncclP2Plist* p2pSends;
|
||||
struct ncclP2Plist* p2pRecvs;
|
||||
int p2pSendCount;
|
||||
int p2pRecvCount;
|
||||
|
||||
// RCCL AllToAll/Scatter/Gather API
|
||||
bool alltoallDisable;
|
||||
|
||||
@@ -57,5 +57,6 @@ static __inline__ int ncclTypeSize(ncclDataType_t type) {
|
||||
#include "alloc.h"
|
||||
#include "utils.h"
|
||||
#include "param.h"
|
||||
#include "nvtx_stub.h"
|
||||
|
||||
#endif // end include guard
|
||||
|
||||
@@ -19,7 +19,7 @@ static int hexToInt(char c) {
|
||||
|
||||
#define CPU_SET_N_U32 (sizeof(cpu_set_t)/sizeof(uint32_t))
|
||||
|
||||
ncclResult_t ncclStrToCpuset(const char* str, cpu_set_t* mask) {
|
||||
static ncclResult_t ncclStrToCpuset(const char* str, cpu_set_t* mask) {
|
||||
uint32_t cpumasks[CPU_SET_N_U32];
|
||||
int m = CPU_SET_N_U32-1;
|
||||
cpumasks[m] = 0;
|
||||
@@ -42,7 +42,7 @@ ncclResult_t ncclStrToCpuset(const char* str, cpu_set_t* mask) {
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t ncclCpusetToStr(cpu_set_t* mask, char* str) {
|
||||
static ncclResult_t ncclCpusetToStr(cpu_set_t* mask, char* str) {
|
||||
int c = 0;
|
||||
uint8_t* m8 = (uint8_t*)mask;
|
||||
for (int o=sizeof(cpu_set_t)-1; o>=0; o--) {
|
||||
|
||||
@@ -23,8 +23,8 @@
|
||||
#endif
|
||||
|
||||
#define NCCL_NUM_FUNCTIONS 5 // SendRecv not included for now
|
||||
typedef enum { ncclCollBroadcast, ncclCollReduce, ncclCollAllGather, ncclCollReduceScatter, ncclCollAllReduce, ncclCollGather, ncclCollScatter, ncclCollAllToAll, ncclCollAllToAllv, ncclCollSendRecv} ncclFunc_t;
|
||||
extern const char* ncclFuncStr[NCCL_NUM_FUNCTIONS+4];
|
||||
typedef enum { ncclFuncBroadcast, ncclFuncReduce, ncclFuncAllGather, ncclFuncReduceScatter, ncclFuncAllReduce, ncclFuncSendRecv} ncclFunc_t;
|
||||
extern const char* ncclFuncStr[NCCL_NUM_FUNCTIONS];
|
||||
|
||||
#define NCCL_NUM_ALGORITHMS 3 // Tree/Ring/CollNet
|
||||
#define NCCL_ALGO_TREE 0
|
||||
@@ -59,6 +59,7 @@ union ncclLLFifoLine {
|
||||
#define WARP_SIZE 64
|
||||
#define MAXCHANNELS 32
|
||||
#define NCCL_MAX_NTHREADS 256
|
||||
#define NCCL_SIMPLE_MAX_NTHREADS NCCL_MAX_NTHREADS
|
||||
#define NCCL_LL_MAX_NTHREADS NCCL_MAX_NTHREADS
|
||||
#define NCCL_LL_LINES_PER_THREAD 8
|
||||
#ifdef TEST_LL_CLEANUP
|
||||
@@ -72,7 +73,7 @@ union ncclLLFifoLine {
|
||||
// Make sure the clean mask will last for at least NCCL_NSTEPS
|
||||
static_assert(NCCL_LL_CLEAN_MASK % NCCL_STEPS == 0, "Invalid NCCL_LL_CLEAN_MASK value");
|
||||
|
||||
#define NCCL_LL128_LINESIZE 64
|
||||
#define NCCL_LL128_LINESIZE 128
|
||||
#define NCCL_LL128_LINEELEMS (NCCL_LL128_LINESIZE/sizeof(uint64_t))
|
||||
#define NCCL_LL128_DATAELEMS (NCCL_LL128_LINEELEMS-1)
|
||||
|
||||
@@ -83,15 +84,12 @@ static_assert(NCCL_LL_CLEAN_MASK % NCCL_STEPS == 0, "Invalid NCCL_LL_CLEAN_MASK
|
||||
// to 3 dests. Use 70% for reduce and 30% for bcast.
|
||||
#define NCCL_LL128_SPLIT(nt) ((nt*7/(10*32))*32)
|
||||
|
||||
#define NCCL_LL128_SHMEM_ELEMS_PER_THREAD 8
|
||||
#define NCCL_LL128_SHMEM_ELEMS_PER_THREAD 2
|
||||
#define NCCL_LL128_SHMEM_SIZE (NCCL_LL128_SHMEM_ELEMS_PER_THREAD*NCCL_LL128_MAX_NTHREADS)
|
||||
|
||||
#define NCCL_DIRECT_GPU 0x01
|
||||
#define NCCL_DIRECT_NIC 0x10
|
||||
|
||||
#define MAXBARRIERS 2
|
||||
#define MAXWARPS (NCCL_MAX_NTHREADS/WARP_SIZE)
|
||||
|
||||
struct ncclConnInfo {
|
||||
// Regular comm mechanism
|
||||
char *buffs[NCCL_NUM_PROTOCOLS]; // Local for recv, remote for send
|
||||
@@ -99,9 +97,11 @@ struct ncclConnInfo {
|
||||
uint64_t *head; // Local for send, remote for recv
|
||||
|
||||
int direct; // Direct communication
|
||||
int shared; // Buffers are shared
|
||||
void **ptrExchange; // Pointer exchange for direct communication
|
||||
|
||||
int *fifo; // Size fifo for proxy
|
||||
int *sizesFifo; // Sizes fifo from GPU to proxy
|
||||
void* *ptrsFifo; // Buffer fifo from proxy to GPU
|
||||
|
||||
uint64_t step; // Keep where we are
|
||||
uint64_t llLastCleaning;
|
||||
@@ -110,7 +110,6 @@ struct ncclConnInfo {
|
||||
// allows software to explicitly initiate a flush read to HDP memory. See more
|
||||
// descriptions in primitives.h.
|
||||
uint32_t* next_hdp_reg; // Next GPU in ring (for p2p transport use only)
|
||||
uint32_t* curr_hdp_reg; // Curr GPU in ring (for rdma transport use only)
|
||||
};
|
||||
|
||||
struct ncclConnector {
|
||||
@@ -151,68 +150,53 @@ struct ncclDevComm;
|
||||
|
||||
#pragma pack(push) /* push current alignment to stack */
|
||||
#pragma pack(4) /* set alignment to 4 bytes boundary */
|
||||
/* CollectiveArgs + ncclColl are to be a power of two, currently 64 bytes, */
|
||||
/* to make sure reads to host from the CUDA kernel are aligned. */
|
||||
/* Make sure to adjust padding at the end of ncclColl. */
|
||||
struct CollectiveArgs {
|
||||
struct ncclDevComm* comm;
|
||||
uint64_t opCount;
|
||||
#define NCCL_MAX_WORK_ELEMENTS 2
|
||||
#define NCCL_MAX_GROUPS (NCCL_MAX_WORK_ELEMENTS*2)
|
||||
|
||||
/* ncclWork is to be a power of two, currently 8x64 bytes, */
|
||||
/* to make sure reads to host from the CUDA kernel are aligned. */
|
||||
/* Make sure to adjust padding at the end of ncclWorkElem. */
|
||||
struct ncclWorkElem {
|
||||
// Header
|
||||
struct ncclDevComm* comm;
|
||||
uint16_t nThreads;
|
||||
uint16_t funcIndex;
|
||||
uint16_t index;
|
||||
uint16_t active;
|
||||
|
||||
// local and remote input, output, and buffer
|
||||
const void * sendbuff;
|
||||
void * recvbuff;
|
||||
|
||||
// Op-specific fields. Make sure the common part stays the
|
||||
// same on all structs of the union
|
||||
uint64_t opCount;
|
||||
// Op-specific fields.
|
||||
union {
|
||||
struct {
|
||||
uint16_t nThreads;
|
||||
} common;
|
||||
struct {
|
||||
uint16_t nThreads;
|
||||
uint8_t bid;
|
||||
uint8_t nChannels;
|
||||
uint32_t root;
|
||||
size_t count;
|
||||
size_t lastChunkSize;
|
||||
} coll;
|
||||
struct {
|
||||
uint16_t nThreads;
|
||||
uint16_t unused;
|
||||
int32_t delta;
|
||||
size_t sendCount;
|
||||
size_t recvCount;
|
||||
} p2p;
|
||||
struct {
|
||||
uint16_t nThreads;
|
||||
uint32_t root;
|
||||
uint8_t bid;
|
||||
uint8_t nChannels;
|
||||
size_t count;
|
||||
size_t* extra;
|
||||
} a2av;
|
||||
};
|
||||
};
|
||||
struct ncclColl {
|
||||
union {
|
||||
} coll;
|
||||
struct {
|
||||
struct CollectiveArgs args;
|
||||
uint16_t funcIndex;
|
||||
uint16_t nextIndex;
|
||||
uint8_t active;
|
||||
};
|
||||
int data[0x10];
|
||||
size_t sendCount;
|
||||
size_t recvCount;
|
||||
int32_t delta;
|
||||
uint16_t nThreads;
|
||||
} p2p;
|
||||
uint64_t align[3];
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(struct ncclColl) == (0x10*sizeof(int)), "ncclColl must have a pow2 size");
|
||||
struct ncclWork {
|
||||
struct ncclWorkElem elems[NCCL_MAX_WORK_ELEMENTS];
|
||||
};
|
||||
static_assert(sizeof(struct ncclWorkElem) == (0x10*sizeof(int)), "ncclWorkElem must have a pow2 size");
|
||||
|
||||
struct ncclChannel {
|
||||
union {
|
||||
struct {
|
||||
struct ncclRing ring;
|
||||
struct ncclTree treeUp;
|
||||
struct ncclTree treeDn;
|
||||
struct ncclTree collTreeUp;
|
||||
struct ncclTree collTreeDn;
|
||||
struct ncclTree tree;
|
||||
struct ncclTree collTree;
|
||||
|
||||
int id;
|
||||
|
||||
@@ -221,16 +205,10 @@ struct ncclChannel {
|
||||
struct ncclPeer* devPeers;
|
||||
|
||||
// Operation list for aggregation
|
||||
struct ncclColl* collectives;
|
||||
size_t* collectivesExtra;
|
||||
int collStart;
|
||||
int collCount;
|
||||
int collFifoHead; // Only used by GPU
|
||||
int collFifoTail; // Only used by CPU
|
||||
struct ncclWork* workFifo;
|
||||
int workCount;
|
||||
uint64_t workFifoTail; // Only used by CPU
|
||||
|
||||
uint32_t* sync;
|
||||
uint64_t* barrier;
|
||||
uint64_t* barrier_next;
|
||||
#ifdef ENABLE_PROFILING
|
||||
struct timeval tvs;
|
||||
uint64_t sizes;
|
||||
@@ -288,9 +266,11 @@ struct ncclProf {
|
||||
|
||||
#ifdef ENABLE_COLLTRACE
|
||||
typedef enum {
|
||||
ncclCollTraceNotReady,
|
||||
ncclCollTraceKernelLaunchType,
|
||||
ncclCollTraceCollEndType,
|
||||
ncclCollTraceAbortType
|
||||
ncclCollTraceAbortType,
|
||||
ncclCollTraceDataType
|
||||
} ncclCollTraceDataType_t;
|
||||
|
||||
struct ncclCollTrace {
|
||||
@@ -304,7 +284,7 @@ struct ncclCollTrace {
|
||||
};
|
||||
static_assert(sizeof(struct ncclCollTrace) == 8*sizeof(int), "ncclCollTrace must have a pow2 size");
|
||||
|
||||
#define COLLTRACE_NUM_ITEMS 1024
|
||||
#define COLLTRACE_NUM_ITEMS 8192
|
||||
#endif
|
||||
|
||||
struct ncclDevComm {
|
||||
|
||||
@@ -19,5 +19,7 @@ ncclResult_t ncclBarrierEnqueue(struct ncclComm* comm);
|
||||
ncclResult_t ncclBarrierEnqueueWait(struct ncclComm* comm);
|
||||
ncclResult_t ncclEnqueueEvents(struct ncclComm* comm);
|
||||
ncclResult_t ncclSaveKernel(struct ncclInfo* info);
|
||||
ncclResult_t ncclSaveP2pKernel(struct ncclInfo* info);
|
||||
ncclResult_t ncclSaveCommKernels(struct ncclComm* comm);
|
||||
|
||||
#endif // End include guard
|
||||
|
||||
@@ -29,7 +29,7 @@ ncclResult_t ncclTopoComputeP2pChannels(struct ncclComm* comm);
|
||||
|
||||
// Query topology
|
||||
ncclResult_t ncclTopoGetNetDev(struct ncclTopoSystem* system, int rank, struct ncclTopoGraph* graph, int channelId, int* net);
|
||||
ncclResult_t ncclTopoCheckP2p(struct ncclTopoSystem* system, int64_t id1, int64_t id2, int* p2p, int *read);
|
||||
ncclResult_t ncclTopoCheckP2p(struct ncclTopoSystem* system, int64_t id1, int64_t id2, int* p2p, int *read, int* intermediateRank);
|
||||
ncclResult_t ncclTopoCheckGdr(struct ncclTopoSystem* topo, int64_t busId, int netDev, int read, int* useGdr);
|
||||
|
||||
// Set CPU affinity
|
||||
@@ -45,15 +45,16 @@ ncclResult_t ncclTopoSetAffinity(struct ncclTopoSystem* system, int rank);
|
||||
#define NCCL_TOPO_CPU_TYPE_ZEN 3
|
||||
#define NCCL_TOPO_CPU_TYPE_ROME 4
|
||||
ncclResult_t ncclTopoCpuType(struct ncclTopoSystem* system, int* arch, int* vendor, int* model);
|
||||
ncclResult_t ncclTopoGetNetCount(struct ncclTopoSystem* system, int* count);
|
||||
|
||||
#define NCCL_TOPO_MAX_NODES 256
|
||||
|
||||
// Init search. Needs to be done before calling ncclTopoCompute
|
||||
ncclResult_t ncclTopoSearchInit(struct ncclTopoSystem* system);
|
||||
|
||||
#define NCCL_TOPO_PATTERN_SPLIT_TREE_LOOP 1 // Split tree (send/recv from different ranks) always flowing in the same direction
|
||||
#define NCCL_TOPO_PATTERN_SPLIT_TREE 2 // Split tree (send/recv from different ranks) flowing in both directions
|
||||
#define NCCL_TOPO_PATTERN_TREE 3 // Simple tree (send/recv from same rank) flowing in both directions
|
||||
#define NCCL_TOPO_PATTERN_BALANCED_TREE 1 // Spread NIC traffic between two GPUs (Tree parent + one child on first GPU, second child on second GPU)
|
||||
#define NCCL_TOPO_PATTERN_SPLIT_TREE 2 // Spread NIC traffic between two GPUs (Tree parent on first GPU, tree children on the second GPU)
|
||||
#define NCCL_TOPO_PATTERN_TREE 3 // All NIC traffic going to/from the same GPU
|
||||
#define NCCL_TOPO_PATTERN_RING 4 // Ring
|
||||
struct ncclTopoGraph {
|
||||
// Input / output
|
||||
@@ -84,17 +85,16 @@ struct ncclTopoRanks {
|
||||
int ringSend[MAXCHANNELS];
|
||||
int ringPrev[MAXCHANNELS];
|
||||
int ringNext[MAXCHANNELS];
|
||||
int treeUpRecv[MAXCHANNELS];
|
||||
int treeUpSend[MAXCHANNELS];
|
||||
int treeDnRecv[MAXCHANNELS];
|
||||
int treeDnSend[MAXCHANNELS];
|
||||
int treeToParent[MAXCHANNELS];
|
||||
int treeToChild0[MAXCHANNELS];
|
||||
int treeToChild1[MAXCHANNELS];
|
||||
};
|
||||
|
||||
ncclResult_t ncclTopoPreset(struct ncclComm* comm,
|
||||
struct ncclTopoGraph* treeGraph, struct ncclTopoGraph* ringGraph, struct ncclTopoGraph* collNetGraph,
|
||||
struct ncclTopoRanks* topoRanks);
|
||||
|
||||
ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks,
|
||||
ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePatterns,
|
||||
struct ncclTopoRanks** allTopoRanks, int* rings, int gcn, int nnets);
|
||||
|
||||
ncclResult_t ncclTopoConnectCollNet(struct ncclComm* comm, struct ncclTopoGraph* collNetGraph, int rank);
|
||||
|
||||
@@ -20,8 +20,7 @@ typedef enum {
|
||||
ncclPatternTreeDown,
|
||||
ncclPatternTreeUpDown,
|
||||
ncclPatternCollTreeUp,
|
||||
ncclPatternCollTreeDown,
|
||||
ncclPatternAll
|
||||
ncclPatternCollTreeDown
|
||||
} ncclPattern_t;
|
||||
|
||||
// Used to pass NCCL call information between functions
|
||||
@@ -40,11 +39,6 @@ struct ncclInfo {
|
||||
// Algorithm details
|
||||
int chunkSteps;
|
||||
int sliceSteps;
|
||||
// For alltoallv
|
||||
const size_t *sendcounts;
|
||||
const size_t *sdispls;
|
||||
const size_t *recvcounts;
|
||||
const size_t *rdispls;
|
||||
// Computed later
|
||||
int algorithm;
|
||||
int protocol;
|
||||
|
||||
@@ -15,6 +15,9 @@
|
||||
#define NCCL_PTR_HOST 0x1
|
||||
#define NCCL_PTR_CUDA 0x2
|
||||
|
||||
// Maximum number of requests per comm object
|
||||
#define NCCL_NET_MAX_REQUESTS 8
|
||||
|
||||
typedef enum {NCCL_LOG_NONE=0, NCCL_LOG_VERSION=1, NCCL_LOG_WARN=2, NCCL_LOG_INFO=3, NCCL_LOG_ABORT=4, NCCL_LOG_TRACE=5} ncclDebugLogLevel;
|
||||
typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ENV=128, NCCL_ALL=~0} ncclDebugLogSubSys;
|
||||
|
||||
@@ -29,9 +32,9 @@ typedef struct {
|
||||
int speed; // Port speed in Mbps.
|
||||
int port; // Port number.
|
||||
int maxComms; // Maximum number of comms we can create
|
||||
}ncclNetProperties_v3_t;
|
||||
}ncclNetProperties_v4_t;
|
||||
|
||||
typedef ncclNetProperties_v3_t ncclNetProperties_t;
|
||||
typedef ncclNetProperties_v4_t ncclNetProperties_t;
|
||||
|
||||
typedef struct {
|
||||
// Name of the network (mainly for logs)
|
||||
@@ -41,7 +44,7 @@ typedef struct {
|
||||
// Return the number of adapters.
|
||||
ncclResult_t (*devices)(int* ndev);
|
||||
// Get various device properties.
|
||||
ncclResult_t (*getProperties)(int dev, ncclNetProperties_v3_t* props);
|
||||
ncclResult_t (*getProperties)(int dev, ncclNetProperties_v4_t* props);
|
||||
// Create a receiving object and provide a handle to connect to it. The
|
||||
// handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
|
||||
// between ranks to create a connection.
|
||||
@@ -62,7 +65,7 @@ typedef struct {
|
||||
ncclResult_t (*irecv)(void* recvComm, void* data, int size, void* mhandle, void** request);
|
||||
// Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
|
||||
// visible to the GPU
|
||||
ncclResult_t (*flush)(void* recvComm, void* data, int size, void* mhandle);
|
||||
ncclResult_t (*iflush)(void* recvComm, void* data, int size, void* mhandle, void** request);
|
||||
// Test whether a request is complete. If size is not NULL, it returns the
|
||||
// number of bytes sent/received.
|
||||
ncclResult_t (*test)(void* request, int* done, int* size);
|
||||
@@ -70,11 +73,11 @@ typedef struct {
|
||||
ncclResult_t (*closeSend)(void* sendComm);
|
||||
ncclResult_t (*closeRecv)(void* recvComm);
|
||||
ncclResult_t (*closeListen)(void* listenComm);
|
||||
} ncclNet_v3_t;
|
||||
} ncclNet_v4_t;
|
||||
|
||||
typedef ncclNet_v3_t ncclNet_t;
|
||||
typedef ncclNet_v4_t ncclNet_t;
|
||||
|
||||
#define NCCL_PLUGIN_SYMBOL ncclNetPlugin_v3
|
||||
#define NCCL_PLUGIN_SYMBOL ncclNetPlugin_v4
|
||||
|
||||
typedef struct {
|
||||
// Name of the collective network (mainly for logs)
|
||||
@@ -85,7 +88,7 @@ typedef struct {
|
||||
// If ndev returns 0, all other functions might be set to NULL.
|
||||
ncclResult_t (*devices)(int* ndev);
|
||||
// Get various device properties.
|
||||
ncclResult_t (*getProperties)(int dev, ncclNetProperties_v3_t* props);
|
||||
ncclResult_t (*getProperties)(int dev, ncclNetProperties_v4_t* props);
|
||||
// Create a receiving object and provide a handle to connect to it. The
|
||||
// handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
|
||||
// between ranks to create connections.
|
||||
@@ -105,17 +108,17 @@ typedef struct {
|
||||
ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request);
|
||||
// Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
|
||||
// visible to the GPU
|
||||
ncclResult_t (*flush)(void* collComm, void* data, int size, void* mhandle);
|
||||
ncclResult_t (*iflush)(void* collComm, void* data, int size, void* mhandle, void** request);
|
||||
// Test whether a request is complete. If size is not NULL, it returns the
|
||||
// number of bytes sent/received.
|
||||
ncclResult_t (*test)(void* request, int* done, int* size);
|
||||
// Close and free collective comm objects
|
||||
ncclResult_t (*closeColl)(void* collComm);
|
||||
ncclResult_t (*closeListen)(void* listenComm);
|
||||
} ncclCollNet_v3_t;
|
||||
} ncclCollNet_v4_t;
|
||||
|
||||
typedef ncclCollNet_v3_t ncclCollNet_t;
|
||||
typedef ncclCollNet_v4_t ncclCollNet_t;
|
||||
|
||||
#define NCCL_COLLNET_PLUGIN_SYMBOL ncclCollNetPlugin_v3
|
||||
#define NCCL_COLLNET_PLUGIN_SYMBOL ncclCollNetPlugin_v4
|
||||
|
||||
#endif // end include guard
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Modifications Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* See LICENSE.txt for license information
|
||||
@@ -25,7 +25,7 @@ static ncclResult_t ncclNetRegMr(void* comm, void* data, int size, int type, voi
|
||||
static ncclResult_t ncclNetDeregMr(void* comm, void* mhandle) { NCCLCHECK(ncclNet->deregMr(comm, mhandle)); return ncclSuccess; }
|
||||
static ncclResult_t ncclNetIsend(void* sendComm, void* data, int size, void* mhandle, void** request) { NCCLCHECK(ncclNet->isend(sendComm, data, size, mhandle, request)); return ncclSuccess; }
|
||||
static ncclResult_t ncclNetIrecv(void* recvComm, void* data, int size, void* mhandle, void** request) { NCCLCHECK(ncclNet->irecv(recvComm, data, size, mhandle, request)); return ncclSuccess; }
|
||||
static ncclResult_t ncclNetFlush(void* recvComm, void* data, int size, void* mhandle) { NCCLCHECK(ncclNet->flush(recvComm, data, size, mhandle)); return ncclSuccess; }
|
||||
static ncclResult_t ncclNetIflush(void* recvComm, void* data, int size, void* mhandle, void** request) { NCCLCHECK(ncclNet->iflush(recvComm, data, size, mhandle, request)); return ncclSuccess; }
|
||||
static ncclResult_t ncclNetTest(void* request, int* done, int* size) { NCCLCHECK(ncclNet->test(request, done, size)); return ncclSuccess; }
|
||||
static ncclResult_t ncclNetCloseSend(void* sendComm) { NCCLCHECK(ncclNet->closeSend(sendComm)); return ncclSuccess; }
|
||||
static ncclResult_t ncclNetCloseRecv(void* recvComm) { NCCLCHECK(ncclNet->closeRecv(recvComm)); return ncclSuccess; }
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
@@ -45,14 +45,6 @@ static ncclResult_t wrapNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index)
|
||||
NVMLCHECK(nvmlDeviceGetIndex(device, index));
|
||||
return ncclSuccess;
|
||||
}
|
||||
static ncclResult_t wrapNvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t *device) {
|
||||
NVMLCHECK(nvmlDeviceGetHandleByIndex(index,device));
|
||||
return ncclSuccess;
|
||||
}
|
||||
static ncclResult_t wrapNvmlDeviceGetHandleByPciInfo(nvmlDevice_t device, nvmlPciInfo_t* pci) {
|
||||
NVMLCHECK(nvmlDeviceGetPciInfo(device, pci));
|
||||
return ncclSuccess;
|
||||
}
|
||||
static ncclResult_t wrapNvmlDeviceGetNvLinkState(nvmlDevice_t device, unsigned int link, nvmlEnableState_t *isActive) {
|
||||
NVMLCHECK(nvmlDeviceGetNvLinkState(device, link, isActive));
|
||||
return ncclSuccess;
|
||||
@@ -66,10 +58,6 @@ static ncclResult_t wrapNvmlDeviceGetNvLinkCapability(nvmlDevice_t device, unsig
|
||||
NVMLCHECK(nvmlDeviceGetNvLinkCapability(device, link, capability, capResult));
|
||||
return ncclSuccess;
|
||||
}
|
||||
static ncclResult_t wrapNvmlDeviceGetMinorNumber(nvmlDevice_t device, unsigned int* minorNumber) {
|
||||
NVMLCHECK(nvmlDeviceGetMinorNumber(device, minorNumber));
|
||||
return ncclSuccess;
|
||||
}
|
||||
static ncclResult_t wrapNvmlDeviceGetCudaComputeCapability(nvmlDevice_t device, int* major, int* minor) {
|
||||
NVMLCHECK(nvmlDeviceGetCudaComputeCapability(device, major, minor));
|
||||
return ncclSuccess;
|
||||
@@ -150,12 +138,10 @@ ncclResult_t wrapNvmlShutdown(void);
|
||||
ncclResult_t wrapNvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_t* device);
|
||||
ncclResult_t wrapNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index);
|
||||
ncclResult_t wrapNvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t *device);
|
||||
ncclResult_t wrapNvmlDeviceGetPciInfo(nvmlDevice_t device, nvmlPciInfo_t* pci);
|
||||
ncclResult_t wrapNvmlDeviceGetNvLinkState(nvmlDevice_t device, unsigned int link, nvmlEnableState_t *isActive);
|
||||
ncclResult_t wrapNvmlDeviceGetNvLinkRemotePciInfo(nvmlDevice_t device, unsigned int link, nvmlPciInfo_t *pci);
|
||||
ncclResult_t wrapNvmlDeviceGetNvLinkCapability(nvmlDevice_t device, unsigned int link,
|
||||
nvmlNvLinkCapability_t capability, unsigned int *capResult);
|
||||
ncclResult_t wrapNvmlDeviceGetMinorNumber(nvmlDevice_t device, unsigned int* minorNumber);
|
||||
ncclResult_t wrapNvmlDeviceGetCudaComputeCapability(nvmlDevice_t device, int* major, int* minor);
|
||||
|
||||
#endif // NVML_DIRECT
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
|
||||
#ifndef NCCL_NVTX_H_
|
||||
#define NCCL_NVTX_H_
|
||||
|
||||
#include "nvtx3.hpp"
|
||||
|
||||
struct nccl_domain{static constexpr char const* name{"NCCL"};};
|
||||
|
||||
#endif
|
||||
Diff onderdrukt omdat het te groot bestand
Laad Diff
Diff onderdrukt omdat het te groot bestand
Laad Diff
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#include "nvToolsExt.h"
|
||||
|
||||
#include "cuda.h"
|
||||
|
||||
#ifndef NVTOOLSEXT_CUDA_V3
|
||||
#define NVTOOLSEXT_CUDA_V3
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/* ========================================================================= */
|
||||
/** \name Functions for CUDA Resource Naming
|
||||
*/
|
||||
/** \addtogroup RESOURCE_NAMING
|
||||
* \section RESOURCE_NAMING_CUDA CUDA Resource Naming
|
||||
*
|
||||
* This section covers the API functions that allow to annotate CUDA resources
|
||||
* with user-provided names.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* \cond SHOW_HIDDEN
|
||||
* \brief Used to build a non-colliding value for resource types separated class
|
||||
* \version \NVTX_VERSION_2
|
||||
*/
|
||||
#define NVTX_RESOURCE_CLASS_CUDA 4
|
||||
/** \endcond */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Resource types for CUDA
|
||||
*/
|
||||
typedef enum nvtxResourceCUDAType_t
|
||||
{
|
||||
NVTX_RESOURCE_TYPE_CUDA_DEVICE = NVTX_RESOURCE_MAKE_TYPE(CUDA, 1), /* CUdevice */
|
||||
NVTX_RESOURCE_TYPE_CUDA_CONTEXT = NVTX_RESOURCE_MAKE_TYPE(CUDA, 2), /* CUcontext */
|
||||
NVTX_RESOURCE_TYPE_CUDA_STREAM = NVTX_RESOURCE_MAKE_TYPE(CUDA, 3), /* CUstream */
|
||||
NVTX_RESOURCE_TYPE_CUDA_EVENT = NVTX_RESOURCE_MAKE_TYPE(CUDA, 4), /* CUevent */
|
||||
} nvtxResourceCUDAType_t;
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates a CUDA device.
|
||||
*
|
||||
* Allows the user to associate a CUDA device with a user-provided name.
|
||||
*
|
||||
* \param device - The handle of the CUDA device to name.
|
||||
* \param name - The name of the CUDA device.
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceA(CUdevice device, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceW(CUdevice device, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates a CUDA context.
|
||||
*
|
||||
* Allows the user to associate a CUDA context with a user-provided name.
|
||||
*
|
||||
* \param context - The handle of the CUDA context to name.
|
||||
* \param name - The name of the CUDA context.
|
||||
*
|
||||
* \par Example:
|
||||
* \code
|
||||
* CUresult status = cuCtxCreate( &cuContext, 0, cuDevice );
|
||||
* if ( CUDA_SUCCESS != status )
|
||||
* goto Error;
|
||||
* nvtxNameCuContext(cuContext, "CTX_NAME");
|
||||
* \endcode
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuContextA(CUcontext context, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuContextW(CUcontext context, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates a CUDA stream.
|
||||
*
|
||||
* Allows the user to associate a CUDA stream with a user-provided name.
|
||||
*
|
||||
* \param stream - The handle of the CUDA stream to name.
|
||||
* \param name - The name of the CUDA stream.
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamA(CUstream stream, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamW(CUstream stream, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates a CUDA event.
|
||||
*
|
||||
* Allows the user to associate a CUDA event with a user-provided name.
|
||||
*
|
||||
* \param event - The handle of the CUDA event to name.
|
||||
* \param name - The name of the CUDA event.
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuEventA(CUevent event, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuEventW(CUevent event, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/** @} */ /* END RESOURCE_NAMING */
|
||||
|
||||
/* ========================================================================= */
|
||||
#ifdef UNICODE
|
||||
#define nvtxNameCuDevice nvtxNameCuDeviceW
|
||||
#define nvtxNameCuContext nvtxNameCuContextW
|
||||
#define nvtxNameCuStream nvtxNameCuStreamW
|
||||
#define nvtxNameCuEvent nvtxNameCuEventW
|
||||
#else
|
||||
#define nvtxNameCuDevice nvtxNameCuDeviceA
|
||||
#define nvtxNameCuContext nvtxNameCuContextA
|
||||
#define nvtxNameCuStream nvtxNameCuStreamA
|
||||
#define nvtxNameCuEvent nvtxNameCuEventA
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#ifndef NVTX_NO_IMPL
|
||||
#define NVTX_IMPL_GUARD_CUDA /* Ensure other headers cannot included directly */
|
||||
#include "nvtxDetail/nvtxImplCuda_v3.h"
|
||||
#undef NVTX_IMPL_GUARD_CUDA
|
||||
#endif /*NVTX_NO_IMPL*/
|
||||
|
||||
#endif /* NVTOOLSEXT_CUDA_V3 */
|
||||
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#include "nvToolsExt.h"
|
||||
|
||||
#include "cuda.h"
|
||||
#include "driver_types.h"
|
||||
|
||||
#ifndef NVTOOLSEXT_CUDART_V3
|
||||
#define NVTOOLSEXT_CUDART_V3
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/* ========================================================================= */
|
||||
/** \name Functions for CUDA Resource Naming
|
||||
*/
|
||||
/** \addtogroup RESOURCE_NAMING
|
||||
* \section RESOURCE_NAMING_CUDART CUDA Runtime Resource Naming
|
||||
*
|
||||
* This section covers the API functions that allow to annotate CUDA resources
|
||||
* with user-provided names.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* \cond SHOW_HIDDEN
|
||||
* \brief Used to build a non-colliding value for resource types separated class
|
||||
* \version \NVTX_VERSION_2
|
||||
*/
|
||||
#define NVTX_RESOURCE_CLASS_CUDART 5
|
||||
/** \endcond */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Resource types for CUDART
|
||||
*/
|
||||
typedef enum nvtxResourceCUDARTType_t
|
||||
{
|
||||
NVTX_RESOURCE_TYPE_CUDART_DEVICE = NVTX_RESOURCE_MAKE_TYPE(CUDART, 0), /* int device */
|
||||
NVTX_RESOURCE_TYPE_CUDART_STREAM = NVTX_RESOURCE_MAKE_TYPE(CUDART, 1), /* cudaStream_t */
|
||||
NVTX_RESOURCE_TYPE_CUDART_EVENT = NVTX_RESOURCE_MAKE_TYPE(CUDART, 2), /* cudaEvent_t */
|
||||
} nvtxResourceCUDARTType_t;
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates a CUDA device.
|
||||
*
|
||||
* Allows the user to associate a CUDA device with a user-provided name.
|
||||
*
|
||||
* \param device - The id of the CUDA device to name.
|
||||
* \param name - The name of the CUDA device.
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates a CUDA stream.
|
||||
*
|
||||
* Allows the user to associate a CUDA stream with a user-provided name.
|
||||
*
|
||||
* \param stream - The handle of the CUDA stream to name.
|
||||
* \param name - The name of the CUDA stream.
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates a CUDA event.
|
||||
*
|
||||
* Allows the user to associate a CUDA event with a user-provided name.
|
||||
*
|
||||
* \param event - The handle of the CUDA event to name.
|
||||
* \param name - The name of the CUDA event.
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/** @} */ /* END RESOURCE_NAMING */
|
||||
|
||||
/* ========================================================================= */
|
||||
#ifdef UNICODE
|
||||
#define nvtxNameCudaDevice nvtxNameCudaDeviceW
|
||||
#define nvtxNameCudaStream nvtxNameCudaStreamW
|
||||
#define nvtxNameCudaEvent nvtxNameCudaEventW
|
||||
#else
|
||||
#define nvtxNameCudaDevice nvtxNameCudaDeviceA
|
||||
#define nvtxNameCudaStream nvtxNameCudaStreamA
|
||||
#define nvtxNameCudaEvent nvtxNameCudaEventA
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#ifndef NVTX_NO_IMPL
|
||||
#define NVTX_IMPL_GUARD_CUDART /* Ensure other headers cannot included directly */
|
||||
#include "nvtxDetail/nvtxImplCudaRt_v3.h"
|
||||
#undef NVTX_IMPL_GUARD_CUDART
|
||||
#endif /*NVTX_NO_IMPL*/
|
||||
|
||||
#endif /* NVTOOLSEXT_CUDART_V3 */
|
||||
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#include "nvToolsExt.h"
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifndef NVTOOLSEXT_OPENCL_V3
|
||||
#define NVTOOLSEXT_OPENCL_V3
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/* ========================================================================= */
|
||||
/** \name Functions for OpenCL Resource Naming
|
||||
*/
|
||||
/** \addtogroup RESOURCE_NAMING
|
||||
* \section RESOURCE_NAMING_OPENCL OpenCL Resource Naming
|
||||
*
|
||||
* This section covers the API functions that allow to annotate OpenCL resources
|
||||
* with user-provided names.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* \cond SHOW_HIDDEN
|
||||
* \brief Used to build a non-colliding value for resource types separated class
|
||||
* \version \NVTX_VERSION_2
|
||||
*/
|
||||
#define NVTX_RESOURCE_CLASS_OPENCL 6
|
||||
/** \endcond */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Resource types for OpenCL
|
||||
*/
|
||||
typedef enum nvtxResourceOpenCLType_t
|
||||
{
|
||||
NVTX_RESOURCE_TYPE_OPENCL_DEVICE = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 1),
|
||||
NVTX_RESOURCE_TYPE_OPENCL_CONTEXT = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 2),
|
||||
NVTX_RESOURCE_TYPE_OPENCL_COMMANDQUEUE = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 3),
|
||||
NVTX_RESOURCE_TYPE_OPENCL_MEMOBJECT = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 4),
|
||||
NVTX_RESOURCE_TYPE_OPENCL_SAMPLER = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 5),
|
||||
NVTX_RESOURCE_TYPE_OPENCL_PROGRAM = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 6),
|
||||
NVTX_RESOURCE_TYPE_OPENCL_EVENT = NVTX_RESOURCE_MAKE_TYPE(OPENCL, 7),
|
||||
} nvtxResourceOpenCLType_t;
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates an OpenCL device.
|
||||
*
|
||||
* Allows to associate an OpenCL device with a user-provided name.
|
||||
*
|
||||
* \param device - The handle of the OpenCL device to name.
|
||||
* \param name - The name of the OpenCL device.
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceA(cl_device_id device, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceW(cl_device_id device, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates an OpenCL context.
|
||||
*
|
||||
* Allows to associate an OpenCL context with a user-provided name.
|
||||
*
|
||||
* \param context - The handle of the OpenCL context to name.
|
||||
* \param name - The name of the OpenCL context.
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClContextA(cl_context context, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClContextW(cl_context context, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates an OpenCL command queue.
|
||||
*
|
||||
* Allows to associate an OpenCL command queue with a user-provided name.
|
||||
*
|
||||
* \param command_queue - The handle of the OpenCL command queue to name.
|
||||
* \param name - The name of the OpenCL command queue.
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueA(cl_command_queue command_queue, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueW(cl_command_queue command_queue, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates an OpenCL memory object.
|
||||
*
|
||||
* Allows to associate an OpenCL memory object with a user-provided name.
|
||||
*
|
||||
* \param memobj - The handle of the OpenCL memory object to name.
|
||||
* \param name - The name of the OpenCL memory object.
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectA(cl_mem memobj, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectW(cl_mem memobj, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates an OpenCL sampler.
|
||||
*
|
||||
* Allows to associate an OpenCL sampler with a user-provided name.
|
||||
*
|
||||
* \param sampler - The handle of the OpenCL sampler to name.
|
||||
* \param name - The name of the OpenCL sampler.
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerA(cl_sampler sampler, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerW(cl_sampler sampler, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates an OpenCL program.
|
||||
*
|
||||
* Allows to associate an OpenCL program with a user-provided name.
|
||||
*
|
||||
* \param program - The handle of the OpenCL program to name.
|
||||
* \param name - The name of the OpenCL program.
|
||||
*
|
||||
* \code
|
||||
* cpProgram = clCreateProgramWithSource(cxGPUContext, 1,
|
||||
* (const char **) &cSourceCL, &program_length, &ciErrNum);
|
||||
* shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
|
||||
* nvtxNameClProgram(cpProgram, L"PROGRAM_NAME");
|
||||
* \endcode
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClProgramA(cl_program program, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClProgramW(cl_program program, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Annotates an OpenCL event.
|
||||
*
|
||||
* Allows to associate an OpenCL event with a user-provided name.
|
||||
*
|
||||
* \param evnt - The handle of the OpenCL event to name.
|
||||
* \param name - The name of the OpenCL event.
|
||||
*
|
||||
* \version \NVTX_VERSION_1
|
||||
* @{ */
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClEventA(cl_event evnt, const char* name);
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClEventW(cl_event evnt, const wchar_t* name);
|
||||
/** @} */
|
||||
|
||||
/** @} */ /* END RESOURCE_NAMING */
|
||||
|
||||
/* ========================================================================= */
|
||||
#ifdef UNICODE
|
||||
#define nvtxNameClDevice nvtxNameClDeviceW
|
||||
#define nvtxNameClContext nvtxNameClContextW
|
||||
#define nvtxNameClCommandQueue nvtxNameClCommandQueueW
|
||||
#define nvtxNameClMemObject nvtxNameClMemObjectW
|
||||
#define nvtxNameClSampler nvtxNameClSamplerW
|
||||
#define nvtxNameClProgram nvtxNameClProgramW
|
||||
#define nvtxNameClEvent nvtxNameClEventW
|
||||
#else
|
||||
#define nvtxNameClDevice nvtxNameClDeviceA
|
||||
#define nvtxNameClContext nvtxNameClContextA
|
||||
#define nvtxNameClCommandQueue nvtxNameClCommandQueueA
|
||||
#define nvtxNameClMemObject nvtxNameClMemObjectA
|
||||
#define nvtxNameClSampler nvtxNameClSamplerA
|
||||
#define nvtxNameClProgram nvtxNameClProgramA
|
||||
#define nvtxNameClEvent nvtxNameClEventA
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#ifndef NVTX_NO_IMPL
|
||||
#define NVTX_IMPL_GUARD_OPENCL /* Ensure other headers cannot included directly */
|
||||
#include "nvtxDetail/nvtxImplOpenCL_v3.h"
|
||||
#undef NVTX_IMPL_GUARD_OPENCL
|
||||
#endif /*NVTX_NO_IMPL*/
|
||||
|
||||
#endif /* NVTOOLSEXT_OPENCL_V3 */
|
||||
@@ -0,0 +1,382 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#include "nvToolsExt.h"
|
||||
|
||||
#ifndef NVTOOLSEXT_SYNC_V3
|
||||
#define NVTOOLSEXT_SYNC_V3
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/* \cond SHOW_HIDDEN
|
||||
* \version \NVTX_VERSION_2
|
||||
*/
|
||||
#define NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE ( (uint16_t)( sizeof(nvtxSyncUserAttributes_v0) ) )
|
||||
/** \endcond */
|
||||
|
||||
|
||||
/**
|
||||
* \page PAGE_SYNCHRONIZATION Synchronization
|
||||
*
|
||||
* This section covers a subset of the API that allow users to track additional
|
||||
* synchronization details of their application. Naming OS synchronization primitives
|
||||
* may allow users to better understand the data collected by traced synchronization
|
||||
* APIs. Additionally, a user defined synchronization object can allow the users to
|
||||
* to tell the tools when the user is building their own synchronization system
|
||||
* that do not rely on the OS to provide behaviors and instead use techniques like
|
||||
* atomic operations and spinlocks.
|
||||
*
|
||||
* See module \ref SYNCHRONIZATION for details.
|
||||
*
|
||||
* \par Example:
|
||||
* \code
|
||||
* class MyMutex
|
||||
* {
|
||||
* volatile long bLocked;
|
||||
* nvtxSyncUser_t hSync;
|
||||
* public:
|
||||
* MyMutex(const char* name, nvtxDomainHandle_t d){
|
||||
* bLocked = 0;
|
||||
*
|
||||
* nvtxSyncUserAttributes_t attribs = { 0 };
|
||||
* attribs.version = NVTX_VERSION;
|
||||
* attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
|
||||
* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
|
||||
* attribs.message.ascii = name;
|
||||
* hSync = nvtxDomainSyncUserCreate(d, &attribs);
|
||||
* }
|
||||
*
|
||||
* ~MyMutex() {
|
||||
* nvtxDomainSyncUserDestroy(hSync);
|
||||
* }
|
||||
*
|
||||
* bool Lock() {
|
||||
* nvtxDomainSyncUserAcquireStart(hSync);
|
||||
* bool acquired = __sync_bool_compare_and_swap(&bLocked, 0, 1);//atomic compiler intrinsic
|
||||
|
||||
* if (acquired) {
|
||||
* nvtxDomainSyncUserAcquireSuccess(hSync);
|
||||
* }
|
||||
* else {
|
||||
* nvtxDomainSyncUserAcquireFailed(hSync);
|
||||
* }
|
||||
* return acquired;
|
||||
* }
|
||||
|
||||
* void Unlock() {
|
||||
* nvtxDomainSyncUserReleasing(hSync);
|
||||
* bLocked = false;
|
||||
* }
|
||||
* };
|
||||
* \endcode
|
||||
*
|
||||
* \version \NVTX_VERSION_2
|
||||
*/
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/* \cond SHOW_HIDDEN
|
||||
* \brief Used to build a non-colliding value for resource types separated class
|
||||
* \version \NVTX_VERSION_2
|
||||
*/
|
||||
#define NVTX_RESOURCE_CLASS_SYNC_OS 2 /**< Synchronization objects that are OS specific. */
|
||||
#define NVTX_RESOURCE_CLASS_SYNC_PTHREAD 3 /**< Synchronization objects that are from the POSIX Threads API (pthread)*/
|
||||
/** \endcond */
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \defgroup SYNCHRONIZATION Synchronization
|
||||
* See page \ref PAGE_SYNCHRONIZATION.
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Resource type values for OSs with POSIX Thread API support
|
||||
*/
|
||||
typedef enum nvtxResourceSyncPosixThreadType_t
|
||||
{
|
||||
NVTX_RESOURCE_TYPE_SYNC_PTHREAD_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 1), /* pthread_mutex_t */
|
||||
NVTX_RESOURCE_TYPE_SYNC_PTHREAD_CONDITION = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 2), /* pthread_cond_t */
|
||||
NVTX_RESOURCE_TYPE_SYNC_PTHREAD_RWLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 3), /* pthread_rwlock_t */
|
||||
NVTX_RESOURCE_TYPE_SYNC_PTHREAD_BARRIER = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 4), /* pthread_barrier_t */
|
||||
NVTX_RESOURCE_TYPE_SYNC_PTHREAD_SPINLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 5), /* pthread_spinlock_t */
|
||||
NVTX_RESOURCE_TYPE_SYNC_PTHREAD_ONCE = NVTX_RESOURCE_MAKE_TYPE(SYNC_PTHREAD, 6) /* pthread_once_t */
|
||||
} nvtxResourceSyncPosixThreadType_t;
|
||||
|
||||
/** \brief Resource type values for Windows OSs
|
||||
*/
|
||||
typedef enum nvtxResourceSyncWindowsType_t
|
||||
{
|
||||
NVTX_RESOURCE_TYPE_SYNC_WINDOWS_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 1),
|
||||
NVTX_RESOURCE_TYPE_SYNC_WINDOWS_SEMAPHORE = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 2),
|
||||
NVTX_RESOURCE_TYPE_SYNC_WINDOWS_EVENT = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 3),
|
||||
NVTX_RESOURCE_TYPE_SYNC_WINDOWS_CRITICAL_SECTION = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 4),
|
||||
NVTX_RESOURCE_TYPE_SYNC_WINDOWS_SRWLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 5)
|
||||
} nvtxResourceSyncWindowsType_t;
|
||||
|
||||
/** \brief Resource type values for Linux and Linux derived OSs such as Android
|
||||
* \sa
|
||||
* ::nvtxResourceSyncPosixThreadType_t
|
||||
*/
|
||||
typedef enum nvtxResourceSyncLinuxType_t
|
||||
{
|
||||
NVTX_RESOURCE_TYPE_SYNC_LINUX_MUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 1),
|
||||
NVTX_RESOURCE_TYPE_SYNC_LINUX_FUTEX = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 2),
|
||||
NVTX_RESOURCE_TYPE_SYNC_LINUX_SEMAPHORE = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 3),
|
||||
NVTX_RESOURCE_TYPE_SYNC_LINUX_COMPLETION = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 4),
|
||||
NVTX_RESOURCE_TYPE_SYNC_LINUX_SPINLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 5),
|
||||
NVTX_RESOURCE_TYPE_SYNC_LINUX_SEQLOCK = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 6),
|
||||
NVTX_RESOURCE_TYPE_SYNC_LINUX_RCU = NVTX_RESOURCE_MAKE_TYPE(SYNC_OS, 7)
|
||||
} nvtxResourceSyncLinuxType_t;
|
||||
|
||||
/** \brief Resource type values for Android come from Linux.
|
||||
* \sa
|
||||
* ::nvtxResourceSyncLinuxType_t
|
||||
* ::nvtxResourceSyncPosixThreadType_t
|
||||
*/
|
||||
typedef enum nvtxResourceSyncLinuxType_t nvtxResourceSyncAndroidType_t;
|
||||
|
||||
/** \brief User Defined Synchronization Object Handle .
|
||||
* \anchor SYNCUSER_HANDLE_STRUCTURE
|
||||
*
|
||||
* This structure is opaque to the user and is used as a handle to reference
|
||||
* a user defined syncrhonization object. The tools will return a pointer through the API for the application
|
||||
* to hold on it's behalf to reference the string in the future.
|
||||
*
|
||||
*/
|
||||
typedef struct nvtxSyncUser* nvtxSyncUser_t;
|
||||
|
||||
/** \brief User Defined Synchronization Object Attributes Structure.
|
||||
* \anchor USERDEF_SYNC_ATTRIBUTES_STRUCTURE
|
||||
*
|
||||
* This structure is used to describe the attributes of a user defined synchronization
|
||||
* object. The layout of the structure is defined by a specific version of the tools
|
||||
* extension library and can change between different versions of the Tools Extension
|
||||
* library.
|
||||
*
|
||||
* \par Initializing the Attributes
|
||||
*
|
||||
* The caller should always perform the following three tasks when using
|
||||
* attributes:
|
||||
* <ul>
|
||||
* <li>Zero the structure
|
||||
* <li>Set the version field
|
||||
* <li>Set the size field
|
||||
* </ul>
|
||||
*
|
||||
* Zeroing the structure sets all the event attributes types and values
|
||||
* to the default value.
|
||||
*
|
||||
* The version and size field are used by the Tools Extension
|
||||
* implementation to handle multiple versions of the attributes structure.
|
||||
*
|
||||
* It is recommended that the caller use one of the following to methods
|
||||
* to initialize the event attributes structure:
|
||||
*
|
||||
* \par Method 1: Initializing nvtxEventAttributes for future compatibility
|
||||
* \code
|
||||
* nvtxSyncUserAttributes_t attribs = {0};
|
||||
* attribs.version = NVTX_VERSION;
|
||||
* attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
|
||||
* \endcode
|
||||
*
|
||||
* \par Method 2: Initializing nvtxSyncUserAttributes_t for a specific version
|
||||
* \code
|
||||
* nvtxSyncUserAttributes_t attribs = {0};
|
||||
* attribs.version = 1;
|
||||
* attribs.size = (uint16_t)(sizeof(nvtxSyncUserAttributes_t));
|
||||
* \endcode
|
||||
*
|
||||
* If the caller uses Method 1 it is critical that the entire binary
|
||||
* layout of the structure be configured to 0 so that all fields
|
||||
* are initialized to the default value.
|
||||
*
|
||||
* The caller should either use both NVTX_VERSION and
|
||||
* NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE (Method 1) or use explicit values
|
||||
* and a versioned type (Method 2). Using a mix of the two methods
|
||||
* will likely cause either source level incompatibility or binary
|
||||
* incompatibility in the future.
|
||||
*
|
||||
* \par Settings Attribute Types and Values
|
||||
*
|
||||
*
|
||||
* \par Example:
|
||||
* \code
|
||||
* // Initialize
|
||||
* nvtxSyncUserAttributes_t attribs = {0};
|
||||
* attribs.version = NVTX_VERSION;
|
||||
* attribs.size = NVTX_SYNCUSER_ATTRIB_STRUCT_SIZE;
|
||||
*
|
||||
* // Configure the Attributes
|
||||
* attribs.messageType = NVTX_MESSAGE_TYPE_ASCII;
|
||||
* attribs.message.ascii = "Example";
|
||||
* \endcode
|
||||
*
|
||||
* \sa
|
||||
* ::nvtxDomainSyncUserCreate
|
||||
*/
|
||||
typedef struct nvtxSyncUserAttributes_v0
|
||||
{
|
||||
/**
|
||||
* \brief Version flag of the structure.
|
||||
*
|
||||
* Needs to be set to NVTX_VERSION to indicate the version of NVTX APIs
|
||||
* supported in this header file. This can optionally be overridden to
|
||||
* another version of the tools extension library.
|
||||
*/
|
||||
uint16_t version;
|
||||
|
||||
/**
|
||||
* \brief Size of the structure.
|
||||
*
|
||||
* Needs to be set to the size in bytes of the event attribute
|
||||
* structure used to specify the event.
|
||||
*/
|
||||
uint16_t size;
|
||||
|
||||
/** \brief Message type specified in this attribute structure.
|
||||
*
|
||||
* Defines the message format of the attribute structure's \ref nvtxSyncUserAttributes_v0::message
|
||||
* "message" field.
|
||||
*
|
||||
* Default Value is NVTX_MESSAGE_UNKNOWN
|
||||
*/
|
||||
int32_t messageType; /* nvtxMessageType_t */
|
||||
|
||||
/** \brief Message assigned to this attribute structure.
|
||||
*
|
||||
* The text message that is attached to an event.
|
||||
*/
|
||||
nvtxMessageValue_t message;
|
||||
|
||||
} nvtxSyncUserAttributes_v0;
|
||||
|
||||
typedef struct nvtxSyncUserAttributes_v0 nvtxSyncUserAttributes_t;
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Create a user defined synchronization object
|
||||
* This is used to track non-OS synchronization working with spinlocks and atomics
|
||||
*
|
||||
* \param domain - Domain to own the resource
|
||||
* \param attribs - A structure to assign multiple attributes to the object.
|
||||
*
|
||||
* \return A handle that represents the newly created user defined synchronization object.
|
||||
*
|
||||
* \sa
|
||||
* ::nvtxDomainSyncUserCreate
|
||||
* ::nvtxDomainSyncUserDestroy
|
||||
* ::nvtxDomainSyncUserAcquireStart
|
||||
* ::nvtxDomainSyncUserAcquireFailed
|
||||
* ::nvtxDomainSyncUserAcquireSuccess
|
||||
* ::nvtxDomainSyncUserReleasing
|
||||
*
|
||||
* \version \NVTX_VERSION_2
|
||||
*/
|
||||
NVTX_DECLSPEC nvtxSyncUser_t NVTX_API nvtxDomainSyncUserCreate(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Destroy a user defined synchronization object
|
||||
* This is used to track non-OS synchronization working with spinlocks and atomics
|
||||
*
|
||||
* \param handle - A handle to the object to operate on.
|
||||
*
|
||||
* \sa
|
||||
* ::nvtxDomainSyncUserCreate
|
||||
* ::nvtxDomainSyncUserDestroy
|
||||
* ::nvtxDomainSyncUserAcquireStart
|
||||
* ::nvtxDomainSyncUserAcquireFailed
|
||||
* ::nvtxDomainSyncUserAcquireSuccess
|
||||
* ::nvtxDomainSyncUserReleasing
|
||||
*
|
||||
* \version \NVTX_VERSION_2
|
||||
*/
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserDestroy(nvtxSyncUser_t handle);
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Signal to tools that an attempt to acquire a user defined synchronization object
|
||||
*
|
||||
* \param handle - A handle to the object to operate on.
|
||||
*
|
||||
* \sa
|
||||
* ::nvtxDomainSyncUserCreate
|
||||
* ::nvtxDomainSyncUserDestroy
|
||||
* ::nvtxDomainSyncUserAcquireStart
|
||||
* ::nvtxDomainSyncUserAcquireFailed
|
||||
* ::nvtxDomainSyncUserAcquireSuccess
|
||||
* ::nvtxDomainSyncUserReleasing
|
||||
*
|
||||
* \version \NVTX_VERSION_2
|
||||
*/
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireStart(nvtxSyncUser_t handle);
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Signal to tools of failure in acquiring a user defined synchronization object
|
||||
* This should be called after \ref nvtxDomainSyncUserAcquireStart
|
||||
*
|
||||
* \param handle - A handle to the object to operate on.
|
||||
*
|
||||
* \sa
|
||||
* ::nvtxDomainSyncUserCreate
|
||||
* ::nvtxDomainSyncUserDestroy
|
||||
* ::nvtxDomainSyncUserAcquireStart
|
||||
* ::nvtxDomainSyncUserAcquireFailed
|
||||
* ::nvtxDomainSyncUserAcquireSuccess
|
||||
* ::nvtxDomainSyncUserReleasing
|
||||
*
|
||||
* \version \NVTX_VERSION_2
|
||||
*/NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireFailed(nvtxSyncUser_t handle);
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Signal to tools of success in acquiring a user defined synchronization object
|
||||
* This should be called after \ref nvtxDomainSyncUserAcquireStart.
|
||||
*
|
||||
* \param handle - A handle to the object to operate on.
|
||||
*
|
||||
* \sa
|
||||
* ::nvtxDomainSyncUserCreate
|
||||
* ::nvtxDomainSyncUserDestroy
|
||||
* ::nvtxDomainSyncUserAcquireStart
|
||||
* ::nvtxDomainSyncUserAcquireFailed
|
||||
* ::nvtxDomainSyncUserAcquireSuccess
|
||||
* ::nvtxDomainSyncUserReleasing
|
||||
*
|
||||
* \version \NVTX_VERSION_2
|
||||
*/NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireSuccess(nvtxSyncUser_t handle);
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
/** \brief Signal to tools of releasing a reservation on user defined synchronization object
|
||||
* This should be called after \ref nvtxDomainSyncUserAcquireSuccess.
|
||||
*
|
||||
* \param handle - A handle to the object to operate on.
|
||||
*
|
||||
* \sa
|
||||
* ::nvtxDomainSyncUserCreate
|
||||
* ::nvtxDomainSyncUserDestroy
|
||||
* ::nvtxDomainSyncUserAcquireStart
|
||||
* ::nvtxDomainSyncUserAcquireFailed
|
||||
* ::nvtxDomainSyncUserAcquireSuccess
|
||||
* ::nvtxDomainSyncUserReleasing
|
||||
*
|
||||
* \version \NVTX_VERSION_2
|
||||
*/
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserReleasing(nvtxSyncUser_t handle);
|
||||
|
||||
|
||||
/** @} */ /*END defgroup*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#ifndef NVTX_NO_IMPL
|
||||
#define NVTX_IMPL_GUARD_SYNC /* Ensure other headers cannot included directly */
|
||||
#include "nvtxDetail/nvtxImplSync_v3.h"
|
||||
#undef NVTX_IMPL_GUARD_SYNC
|
||||
#endif /*NVTX_NO_IMPL*/
|
||||
|
||||
#endif /* NVTOOLSEXT_SYNC_V3 */
|
||||
@@ -0,0 +1,438 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#ifndef NVTX_IMPL_GUARD
|
||||
#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
|
||||
#endif
|
||||
|
||||
/* ---- Include required platform headers ---- */
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#include <Windows.h>
|
||||
|
||||
#else
|
||||
#include <unistd.h>
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
#include <android/api-level.h>
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) || defined(__CYGWIN__)
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#include <dlfcn.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#endif
|
||||
|
||||
/* ---- Define macros used in this file ---- */
|
||||
|
||||
#define NVTX_INIT_STATE_FRESH 0
|
||||
#define NVTX_INIT_STATE_STARTED 1
|
||||
#define NVTX_INIT_STATE_COMPLETE 2
|
||||
|
||||
#ifdef NVTX_DEBUG_PRINT
|
||||
#ifdef __ANDROID__
|
||||
#include <android/log.h>
|
||||
#define NVTX_ERR(...) __android_log_print(ANDROID_LOG_ERROR, "NVTOOLSEXT", __VA_ARGS__);
|
||||
#define NVTX_INFO(...) __android_log_print(ANDROID_LOG_INFO, "NVTOOLSEXT", __VA_ARGS__);
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#define NVTX_ERR(...) fprintf(stderr, "NVTX_ERROR: " __VA_ARGS__)
|
||||
#define NVTX_INFO(...) fprintf(stderr, "NVTX_INFO: " __VA_ARGS__)
|
||||
#endif
|
||||
#else /* !defined(NVTX_DEBUG_PRINT) */
|
||||
#define NVTX_ERR(...)
|
||||
#define NVTX_INFO(...)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC visibility push(hidden)
|
||||
#endif
|
||||
|
||||
/* ---- Forward declare all functions referenced in globals ---- */
|
||||
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(void);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)(
|
||||
NvtxCallbackModule module,
|
||||
NvtxFunctionTable* out_table,
|
||||
unsigned int* out_size);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)(
|
||||
uint32_t version);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION const void* NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable)(
|
||||
uint32_t exportTableId);
|
||||
|
||||
#include "nvtxInitDecls.h"
|
||||
|
||||
/* ---- Define all globals ---- */
|
||||
|
||||
typedef struct nvtxGlobals_t
|
||||
{
|
||||
volatile unsigned int initState;
|
||||
NvtxExportTableCallbacks etblCallbacks;
|
||||
NvtxExportTableVersionInfo etblVersionInfo;
|
||||
|
||||
/* Implementation function pointers */
|
||||
nvtxMarkEx_impl_fntype nvtxMarkEx_impl_fnptr;
|
||||
nvtxMarkA_impl_fntype nvtxMarkA_impl_fnptr;
|
||||
nvtxMarkW_impl_fntype nvtxMarkW_impl_fnptr;
|
||||
nvtxRangeStartEx_impl_fntype nvtxRangeStartEx_impl_fnptr;
|
||||
nvtxRangeStartA_impl_fntype nvtxRangeStartA_impl_fnptr;
|
||||
nvtxRangeStartW_impl_fntype nvtxRangeStartW_impl_fnptr;
|
||||
nvtxRangeEnd_impl_fntype nvtxRangeEnd_impl_fnptr;
|
||||
nvtxRangePushEx_impl_fntype nvtxRangePushEx_impl_fnptr;
|
||||
nvtxRangePushA_impl_fntype nvtxRangePushA_impl_fnptr;
|
||||
nvtxRangePushW_impl_fntype nvtxRangePushW_impl_fnptr;
|
||||
nvtxRangePop_impl_fntype nvtxRangePop_impl_fnptr;
|
||||
nvtxNameCategoryA_impl_fntype nvtxNameCategoryA_impl_fnptr;
|
||||
nvtxNameCategoryW_impl_fntype nvtxNameCategoryW_impl_fnptr;
|
||||
nvtxNameOsThreadA_impl_fntype nvtxNameOsThreadA_impl_fnptr;
|
||||
nvtxNameOsThreadW_impl_fntype nvtxNameOsThreadW_impl_fnptr;
|
||||
|
||||
nvtxNameCuDeviceA_fakeimpl_fntype nvtxNameCuDeviceA_impl_fnptr;
|
||||
nvtxNameCuDeviceW_fakeimpl_fntype nvtxNameCuDeviceW_impl_fnptr;
|
||||
nvtxNameCuContextA_fakeimpl_fntype nvtxNameCuContextA_impl_fnptr;
|
||||
nvtxNameCuContextW_fakeimpl_fntype nvtxNameCuContextW_impl_fnptr;
|
||||
nvtxNameCuStreamA_fakeimpl_fntype nvtxNameCuStreamA_impl_fnptr;
|
||||
nvtxNameCuStreamW_fakeimpl_fntype nvtxNameCuStreamW_impl_fnptr;
|
||||
nvtxNameCuEventA_fakeimpl_fntype nvtxNameCuEventA_impl_fnptr;
|
||||
nvtxNameCuEventW_fakeimpl_fntype nvtxNameCuEventW_impl_fnptr;
|
||||
|
||||
nvtxNameClDeviceA_fakeimpl_fntype nvtxNameClDeviceA_impl_fnptr;
|
||||
nvtxNameClDeviceW_fakeimpl_fntype nvtxNameClDeviceW_impl_fnptr;
|
||||
nvtxNameClContextA_fakeimpl_fntype nvtxNameClContextA_impl_fnptr;
|
||||
nvtxNameClContextW_fakeimpl_fntype nvtxNameClContextW_impl_fnptr;
|
||||
nvtxNameClCommandQueueA_fakeimpl_fntype nvtxNameClCommandQueueA_impl_fnptr;
|
||||
nvtxNameClCommandQueueW_fakeimpl_fntype nvtxNameClCommandQueueW_impl_fnptr;
|
||||
nvtxNameClMemObjectA_fakeimpl_fntype nvtxNameClMemObjectA_impl_fnptr;
|
||||
nvtxNameClMemObjectW_fakeimpl_fntype nvtxNameClMemObjectW_impl_fnptr;
|
||||
nvtxNameClSamplerA_fakeimpl_fntype nvtxNameClSamplerA_impl_fnptr;
|
||||
nvtxNameClSamplerW_fakeimpl_fntype nvtxNameClSamplerW_impl_fnptr;
|
||||
nvtxNameClProgramA_fakeimpl_fntype nvtxNameClProgramA_impl_fnptr;
|
||||
nvtxNameClProgramW_fakeimpl_fntype nvtxNameClProgramW_impl_fnptr;
|
||||
nvtxNameClEventA_fakeimpl_fntype nvtxNameClEventA_impl_fnptr;
|
||||
nvtxNameClEventW_fakeimpl_fntype nvtxNameClEventW_impl_fnptr;
|
||||
|
||||
nvtxNameCudaDeviceA_impl_fntype nvtxNameCudaDeviceA_impl_fnptr;
|
||||
nvtxNameCudaDeviceW_impl_fntype nvtxNameCudaDeviceW_impl_fnptr;
|
||||
nvtxNameCudaStreamA_fakeimpl_fntype nvtxNameCudaStreamA_impl_fnptr;
|
||||
nvtxNameCudaStreamW_fakeimpl_fntype nvtxNameCudaStreamW_impl_fnptr;
|
||||
nvtxNameCudaEventA_fakeimpl_fntype nvtxNameCudaEventA_impl_fnptr;
|
||||
nvtxNameCudaEventW_fakeimpl_fntype nvtxNameCudaEventW_impl_fnptr;
|
||||
|
||||
nvtxDomainMarkEx_impl_fntype nvtxDomainMarkEx_impl_fnptr;
|
||||
nvtxDomainRangeStartEx_impl_fntype nvtxDomainRangeStartEx_impl_fnptr;
|
||||
nvtxDomainRangeEnd_impl_fntype nvtxDomainRangeEnd_impl_fnptr;
|
||||
nvtxDomainRangePushEx_impl_fntype nvtxDomainRangePushEx_impl_fnptr;
|
||||
nvtxDomainRangePop_impl_fntype nvtxDomainRangePop_impl_fnptr;
|
||||
nvtxDomainResourceCreate_impl_fntype nvtxDomainResourceCreate_impl_fnptr;
|
||||
nvtxDomainResourceDestroy_impl_fntype nvtxDomainResourceDestroy_impl_fnptr;
|
||||
nvtxDomainNameCategoryA_impl_fntype nvtxDomainNameCategoryA_impl_fnptr;
|
||||
nvtxDomainNameCategoryW_impl_fntype nvtxDomainNameCategoryW_impl_fnptr;
|
||||
nvtxDomainRegisterStringA_impl_fntype nvtxDomainRegisterStringA_impl_fnptr;
|
||||
nvtxDomainRegisterStringW_impl_fntype nvtxDomainRegisterStringW_impl_fnptr;
|
||||
nvtxDomainCreateA_impl_fntype nvtxDomainCreateA_impl_fnptr;
|
||||
nvtxDomainCreateW_impl_fntype nvtxDomainCreateW_impl_fnptr;
|
||||
nvtxDomainDestroy_impl_fntype nvtxDomainDestroy_impl_fnptr;
|
||||
nvtxInitialize_impl_fntype nvtxInitialize_impl_fnptr;
|
||||
|
||||
nvtxDomainSyncUserCreate_impl_fntype nvtxDomainSyncUserCreate_impl_fnptr;
|
||||
nvtxDomainSyncUserDestroy_impl_fntype nvtxDomainSyncUserDestroy_impl_fnptr;
|
||||
nvtxDomainSyncUserAcquireStart_impl_fntype nvtxDomainSyncUserAcquireStart_impl_fnptr;
|
||||
nvtxDomainSyncUserAcquireFailed_impl_fntype nvtxDomainSyncUserAcquireFailed_impl_fnptr;
|
||||
nvtxDomainSyncUserAcquireSuccess_impl_fntype nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
|
||||
nvtxDomainSyncUserReleasing_impl_fntype nvtxDomainSyncUserReleasing_impl_fnptr;
|
||||
|
||||
/* Tables of function pointers -- Extra null added to the end to ensure
|
||||
* a crash instead of silent corruption if a tool reads off the end. */
|
||||
NvtxFunctionPointer* functionTable_CORE [NVTX_CBID_CORE_SIZE + 1];
|
||||
NvtxFunctionPointer* functionTable_CUDA [NVTX_CBID_CUDA_SIZE + 1];
|
||||
NvtxFunctionPointer* functionTable_OPENCL[NVTX_CBID_OPENCL_SIZE + 1];
|
||||
NvtxFunctionPointer* functionTable_CUDART[NVTX_CBID_CUDART_SIZE + 1];
|
||||
NvtxFunctionPointer* functionTable_CORE2 [NVTX_CBID_CORE2_SIZE + 1];
|
||||
NvtxFunctionPointer* functionTable_SYNC [NVTX_CBID_SYNC_SIZE + 1];
|
||||
} nvtxGlobals_t;
|
||||
|
||||
NVTX_LINKONCE_DEFINE_GLOBAL nvtxGlobals_t NVTX_VERSIONED_IDENTIFIER(nvtxGlobals) =
|
||||
{
|
||||
NVTX_INIT_STATE_FRESH,
|
||||
|
||||
{
|
||||
sizeof(NvtxExportTableCallbacks),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)
|
||||
},
|
||||
{
|
||||
sizeof(NvtxExportTableVersionInfo),
|
||||
NVTX_VERSION,
|
||||
0,
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)
|
||||
},
|
||||
|
||||
/* Implementation function pointers */
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init),
|
||||
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init),
|
||||
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init),
|
||||
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init),
|
||||
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init),
|
||||
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init),
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init),
|
||||
|
||||
/* Tables of function pointers */
|
||||
{
|
||||
0,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr,
|
||||
0
|
||||
},
|
||||
{
|
||||
0,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr,
|
||||
0
|
||||
},
|
||||
{
|
||||
0,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr,
|
||||
0
|
||||
},
|
||||
{
|
||||
0,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr,
|
||||
0
|
||||
},
|
||||
{
|
||||
0,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr,
|
||||
0
|
||||
},
|
||||
{
|
||||
0,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr,
|
||||
(NvtxFunctionPointer*)&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr,
|
||||
0
|
||||
}
|
||||
};
|
||||
|
||||
/* ---- Define static inline implementations of core API functions ---- */
|
||||
|
||||
#include "nvtxImplCore.h"
|
||||
|
||||
/* ---- Define implementations of export table functions ---- */
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiGetModuleFunctionTable)(
|
||||
NvtxCallbackModule module,
|
||||
NvtxFunctionTable* out_table,
|
||||
unsigned int* out_size)
|
||||
{
|
||||
unsigned int bytes = 0;
|
||||
NvtxFunctionTable table = (NvtxFunctionTable)0;
|
||||
|
||||
switch (module)
|
||||
{
|
||||
case NVTX_CB_MODULE_CORE:
|
||||
table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE;
|
||||
bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE);
|
||||
break;
|
||||
case NVTX_CB_MODULE_CUDA:
|
||||
table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDA;
|
||||
bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDA);
|
||||
break;
|
||||
case NVTX_CB_MODULE_OPENCL:
|
||||
table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_OPENCL;
|
||||
bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_OPENCL);
|
||||
break;
|
||||
case NVTX_CB_MODULE_CUDART:
|
||||
table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDART;
|
||||
bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CUDART);
|
||||
break;
|
||||
case NVTX_CB_MODULE_CORE2:
|
||||
table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE2;
|
||||
bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_CORE2);
|
||||
break;
|
||||
case NVTX_CB_MODULE_SYNC:
|
||||
table = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_SYNC;
|
||||
bytes = (unsigned int)sizeof(NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).functionTable_SYNC);
|
||||
break;
|
||||
default: return 0;
|
||||
}
|
||||
|
||||
if (out_size)
|
||||
*out_size = (bytes / (unsigned int)sizeof(NvtxFunctionPointer*)) - 1;
|
||||
|
||||
if (out_table)
|
||||
*out_table = table;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION const void* NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable)(uint32_t exportTableId)
|
||||
{
|
||||
switch (exportTableId)
|
||||
{
|
||||
case NVTX_ETID_CALLBACKS: return &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).etblCallbacks;
|
||||
case NVTX_ETID_VERSIONINFO: return &NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).etblVersionInfo;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxEtiSetInjectionNvtxVersion)(uint32_t version)
|
||||
{
|
||||
/* Reserved for custom implementations to resolve problems with tools */
|
||||
(void)version;
|
||||
}
|
||||
|
||||
/* ---- Define implementations of init versions of all API functions ---- */
|
||||
|
||||
#include "nvtxInitDefs.h"
|
||||
|
||||
/* ---- Define implementations of initialization functions ---- */
|
||||
|
||||
#include "nvtxInit.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC visibility pop
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
@@ -0,0 +1,307 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxMarkEx(const nvtxEventAttributes_t* eventAttrib)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(eventAttrib);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxMarkA(const char* message)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxMarkA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(message);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxMarkW(const wchar_t* message)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxMarkW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(message);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartEx(const nvtxEventAttributes_t* eventAttrib)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(eventAttrib);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (nvtxRangeId_t)0;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartA(const char* message)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxRangeStartA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(message);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (nvtxRangeId_t)0;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxRangeStartW(const wchar_t* message)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxRangeStartW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(message);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (nvtxRangeId_t)0;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxRangeEnd(nvtxRangeId_t id)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(id);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC int NVTX_API nvtxRangePushEx(const nvtxEventAttributes_t* eventAttrib)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(eventAttrib);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (int)NVTX_NO_PUSH_POP_TRACKING;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC int NVTX_API nvtxRangePushA(const char* message)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxRangePushA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(message);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (int)NVTX_NO_PUSH_POP_TRACKING;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC int NVTX_API nvtxRangePushW(const wchar_t* message)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxRangePushW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(message);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (int)NVTX_NO_PUSH_POP_TRACKING;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC int NVTX_API nvtxRangePop(void)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)();
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (int)NVTX_NO_PUSH_POP_TRACKING;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCategoryA(uint32_t category, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(category, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCategoryW(uint32_t category, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(category, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadA(uint32_t threadId, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameOsThreadA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(threadId, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameOsThreadW(uint32_t threadId, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameOsThreadW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(threadId, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainMarkEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainMarkEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(domain, eventAttrib);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC nvtxRangeId_t NVTX_API nvtxDomainRangeStartEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainRangeStartEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(domain, eventAttrib);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (nvtxRangeId_t)0;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainRangeEnd(nvtxDomainHandle_t domain, nvtxRangeId_t id)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainRangeEnd_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(domain, id);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC int NVTX_API nvtxDomainRangePushEx(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainRangePushEx_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(domain, eventAttrib);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (int)NVTX_NO_PUSH_POP_TRACKING;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC int NVTX_API nvtxDomainRangePop(nvtxDomainHandle_t domain)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainRangePop_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(domain);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (int)NVTX_NO_PUSH_POP_TRACKING;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC nvtxResourceHandle_t NVTX_API nvtxDomainResourceCreate(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainResourceCreate_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(domain, attribs);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (nvtxResourceHandle_t)0;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainResourceDestroy(nvtxResourceHandle_t resource)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainResourceDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(resource);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryA(nvtxDomainHandle_t domain, uint32_t category, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainNameCategoryA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(domain, category, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainNameCategoryW(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainNameCategoryW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(domain, category, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringA(nvtxDomainHandle_t domain, const char* string)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainRegisterStringA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(domain, string);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (nvtxStringHandle_t)0;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC nvtxStringHandle_t NVTX_API nvtxDomainRegisterStringW(nvtxDomainHandle_t domain, const wchar_t* string)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainRegisterStringW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(domain, string);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (nvtxStringHandle_t)0;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateA(const char* message)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainCreateA_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(message);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (nvtxDomainHandle_t)0;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC nvtxDomainHandle_t NVTX_API nvtxDomainCreateW(const wchar_t* message)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainCreateW_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(message);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (nvtxDomainHandle_t)0;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainDestroy(nvtxDomainHandle_t domain)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainDestroy_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(domain);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxInitialize(const void* reserved)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxInitialize_impl_fntype local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(reserved);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#ifndef NVTX_IMPL_GUARD_CUDART
|
||||
#error Never include this file directly -- it is automatically included by nvToolsExtCudaRt.h (except when NVTX_NO_IMPL is defined).
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
typedef void (NVTX_API * nvtxNameCudaDeviceA_impl_fntype)(int device, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCudaDeviceW_impl_fntype)(int device, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameCudaStreamA_impl_fntype)(cudaStream_t stream, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCudaStreamW_impl_fntype)(cudaStream_t stream, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameCudaEventA_impl_fntype)(cudaEvent_t event, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCudaEventW_impl_fntype)(cudaEvent_t event, const wchar_t* name);
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceA(int device, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCudaDeviceA_impl_fntype local = (nvtxNameCudaDeviceA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(device, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCudaDeviceW(int device, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCudaDeviceW_impl_fntype local = (nvtxNameCudaDeviceW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(device, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamA(cudaStream_t stream, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCudaStreamA_impl_fntype local = (nvtxNameCudaStreamA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(stream, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCudaStreamW(cudaStream_t stream, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCudaStreamW_impl_fntype local = (nvtxNameCudaStreamW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(stream, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventA(cudaEvent_t event, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCudaEventA_impl_fntype local = (nvtxNameCudaEventA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(event, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCudaEventW(cudaEvent_t event, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCudaEventW_impl_fntype local = (nvtxNameCudaEventW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(event, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#ifndef NVTX_IMPL_GUARD_CUDA
|
||||
#error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
typedef void (NVTX_API * nvtxNameCuDeviceA_impl_fntype)(CUdevice device, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCuDeviceW_impl_fntype)(CUdevice device, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameCuContextA_impl_fntype)(CUcontext context, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCuContextW_impl_fntype)(CUcontext context, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameCuStreamA_impl_fntype)(CUstream stream, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCuStreamW_impl_fntype)(CUstream stream, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameCuEventA_impl_fntype)(CUevent event, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCuEventW_impl_fntype)(CUevent event, const wchar_t* name);
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceA(CUdevice device, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCuDeviceA_impl_fntype local = (nvtxNameCuDeviceA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(device, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuDeviceW(CUdevice device, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCuDeviceW_impl_fntype local = (nvtxNameCuDeviceW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(device, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuContextA(CUcontext context, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCuContextA_impl_fntype local = (nvtxNameCuContextA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(context, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuContextW(CUcontext context, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCuContextW_impl_fntype local = (nvtxNameCuContextW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(context, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamA(CUstream stream, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCuStreamA_impl_fntype local = (nvtxNameCuStreamA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(stream, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuStreamW(CUstream stream, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCuStreamW_impl_fntype local = (nvtxNameCuStreamW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(stream, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuEventA(CUevent event, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCuEventA_impl_fntype local = (nvtxNameCuEventA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(event, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameCuEventW(CUevent event, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameCuEventW_impl_fntype local = (nvtxNameCuEventW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(event, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#ifndef NVTX_IMPL_GUARD_OPENCL
|
||||
#error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
typedef void (NVTX_API * nvtxNameClDeviceA_impl_fntype)(cl_device_id device, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClDeviceW_impl_fntype)(cl_device_id device, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameClContextA_impl_fntype)(cl_context context, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClContextW_impl_fntype)(cl_context context, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameClCommandQueueA_impl_fntype)(cl_command_queue command_queue, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClCommandQueueW_impl_fntype)(cl_command_queue command_queue, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameClMemObjectA_impl_fntype)(cl_mem memobj, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClMemObjectW_impl_fntype)(cl_mem memobj, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameClSamplerA_impl_fntype)(cl_sampler sampler, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClSamplerW_impl_fntype)(cl_sampler sampler, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameClProgramA_impl_fntype)(cl_program program, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClProgramW_impl_fntype)(cl_program program, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameClEventA_impl_fntype)(cl_event evnt, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClEventW_impl_fntype)(cl_event evnt, const wchar_t* name);
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceA(cl_device_id device, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClDeviceA_impl_fntype local = (nvtxNameClDeviceA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(device, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClDeviceW(cl_device_id device, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClDeviceW_impl_fntype local = (nvtxNameClDeviceW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(device, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClContextA(cl_context context, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClContextA_impl_fntype local = (nvtxNameClContextA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(context, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClContextW(cl_context context, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClContextW_impl_fntype local = (nvtxNameClContextW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(context, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueA(cl_command_queue command_queue, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClCommandQueueA_impl_fntype local = (nvtxNameClCommandQueueA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(command_queue, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClCommandQueueW(cl_command_queue command_queue, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClCommandQueueW_impl_fntype local = (nvtxNameClCommandQueueW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(command_queue, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectA(cl_mem memobj, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClMemObjectA_impl_fntype local = (nvtxNameClMemObjectA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(memobj, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClMemObjectW(cl_mem memobj, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClMemObjectW_impl_fntype local = (nvtxNameClMemObjectW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(memobj, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerA(cl_sampler sampler, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClSamplerA_impl_fntype local = (nvtxNameClSamplerA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(sampler, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClSamplerW(cl_sampler sampler, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClSamplerW_impl_fntype local = (nvtxNameClSamplerW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(sampler, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClProgramA(cl_program program, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClProgramA_impl_fntype local = (nvtxNameClProgramA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(program, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClProgramW(cl_program program, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClProgramW_impl_fntype local = (nvtxNameClProgramW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(program, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClEventA(cl_event evnt, const char* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClEventA_impl_fntype local = (nvtxNameClEventA_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(evnt, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxNameClEventW(cl_event evnt, const wchar_t* name)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxNameClEventW_impl_fntype local = (nvtxNameClEventW_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(evnt, name);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#ifndef NVTX_IMPL_GUARD_SYNC
|
||||
#error Never include this file directly -- it is automatically included by nvToolsExtCuda.h (except when NVTX_NO_IMPL is defined).
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
typedef nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_impl_fntype)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
|
||||
typedef void (NVTX_API * nvtxDomainSyncUserDestroy_impl_fntype)(nvtxSyncUser_t handle);
|
||||
typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_impl_fntype)(nvtxSyncUser_t handle);
|
||||
typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_impl_fntype)(nvtxSyncUser_t handle);
|
||||
typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_impl_fntype)(nvtxSyncUser_t handle);
|
||||
typedef void (NVTX_API * nvtxDomainSyncUserReleasing_impl_fntype)(nvtxSyncUser_t handle);
|
||||
|
||||
NVTX_DECLSPEC nvtxSyncUser_t NVTX_API nvtxDomainSyncUserCreate(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainSyncUserCreate_impl_fntype local = (nvtxDomainSyncUserCreate_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr;
|
||||
if(local!=0)
|
||||
return (*local)(domain, attribs);
|
||||
else
|
||||
#endif /*NVTX_DISABLE*/
|
||||
return (nvtxSyncUser_t)0;
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserDestroy(nvtxSyncUser_t handle)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainSyncUserDestroy_impl_fntype local = (nvtxDomainSyncUserDestroy_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(handle);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireStart(nvtxSyncUser_t handle)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainSyncUserAcquireStart_impl_fntype local = (nvtxDomainSyncUserAcquireStart_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(handle);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireFailed(nvtxSyncUser_t handle)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainSyncUserAcquireFailed_impl_fntype local = (nvtxDomainSyncUserAcquireFailed_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(handle);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserAcquireSuccess(nvtxSyncUser_t handle)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainSyncUserAcquireSuccess_impl_fntype local = (nvtxDomainSyncUserAcquireSuccess_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(handle);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
NVTX_DECLSPEC void NVTX_API nvtxDomainSyncUserReleasing(nvtxSyncUser_t handle)
|
||||
{
|
||||
#ifndef NVTX_DISABLE
|
||||
nvtxDomainSyncUserReleasing_impl_fntype local = (nvtxDomainSyncUserReleasing_impl_fntype)NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr;
|
||||
if(local!=0)
|
||||
(*local)(handle);
|
||||
#endif /*NVTX_DISABLE*/
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
@@ -0,0 +1,312 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#ifndef NVTX_IMPL_GUARD
|
||||
#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
|
||||
#endif
|
||||
|
||||
/* ---- Platform-independent helper definitions and functions ---- */
|
||||
|
||||
/* Prefer macros over inline functions to reduce symbol resolution at link time */
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define NVTX_PATHCHAR wchar_t
|
||||
#define NVTX_STR(x) L##x
|
||||
#define NVTX_GETENV _wgetenv
|
||||
#define NVTX_BUFSIZE MAX_PATH
|
||||
#define NVTX_DLLHANDLE HMODULE
|
||||
#define NVTX_DLLOPEN(x) LoadLibraryW(x)
|
||||
#define NVTX_DLLFUNC GetProcAddress
|
||||
#define NVTX_DLLCLOSE FreeLibrary
|
||||
#define NVTX_YIELD() SwitchToThread()
|
||||
#define NVTX_MEMBAR() MemoryBarrier()
|
||||
#define NVTX_ATOMIC_WRITE_32(address, value) InterlockedExchange((volatile LONG*)address, value)
|
||||
#define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) old = InterlockedCompareExchange((volatile LONG*)address, exchange, comparand)
|
||||
#elif defined(__GNUC__)
|
||||
#define NVTX_PATHCHAR char
|
||||
#define NVTX_STR(x) x
|
||||
#define NVTX_GETENV getenv
|
||||
#define NVTX_BUFSIZE PATH_MAX
|
||||
#define NVTX_DLLHANDLE void*
|
||||
#define NVTX_DLLOPEN(x) dlopen(x, RTLD_LAZY)
|
||||
#define NVTX_DLLFUNC dlsym
|
||||
#define NVTX_DLLCLOSE dlclose
|
||||
#define NVTX_YIELD() sched_yield()
|
||||
#define NVTX_MEMBAR() __sync_synchronize()
|
||||
/* Ensure full memory barrier for atomics, to match Windows functions */
|
||||
#define NVTX_ATOMIC_WRITE_32(address, value) __sync_synchronize(); __sync_lock_test_and_set(address, value)
|
||||
#define NVTX_ATOMIC_CAS_32(old, address, exchange, comparand) __sync_synchronize(); old = __sync_val_compare_and_swap(address, exchange, comparand)
|
||||
#else
|
||||
#error The library does not support your configuration!
|
||||
#endif
|
||||
|
||||
/* Define this to 1 for platforms that where pre-injected libraries can be discovered. */
|
||||
#if defined(_WIN32)
|
||||
/* TODO */
|
||||
#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
|
||||
#else
|
||||
#define NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY 0
|
||||
#endif
|
||||
|
||||
/* Define this to 1 for platforms that support environment variables */
|
||||
/* TODO: Detect UWP, a.k.a. Windows Store app, and set this to 0. */
|
||||
/* Try: #if defined(WINAPI_FAMILY_PARTITION) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) */
|
||||
#define NVTX_SUPPORT_ENV_VARS 1
|
||||
|
||||
/* Define this to 1 for platforms that support dynamic/shared libraries */
|
||||
#define NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY 1
|
||||
|
||||
/* Injection libraries implementing InitializeInjectionNvtx2 may be statically linked,
|
||||
* and this will override any dynamic injection. Useful for platforms where dynamic
|
||||
* injection is not available. Since weak symbols not explicitly marked extern are
|
||||
* guaranteed to be initialized to zero if no definitions are found by the linker, the
|
||||
* dynamic injection process proceeds normally if pfnInitializeInjectionNvtx2 is 0. */
|
||||
#if defined(__GNUC__) && !defined(_WIN32) && !defined(__CYGWIN__)
|
||||
#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 1
|
||||
/* To statically inject an NVTX library, define InitializeInjectionNvtx2_fnptr as a normal
|
||||
* symbol (not weak) pointing to the implementation of InitializeInjectionNvtx2 (which
|
||||
* does not need to be named "InitializeInjectionNvtx2" as is necessary in a dynamic
|
||||
* injection library. */
|
||||
__attribute__((weak)) NvtxInitializeInjectionNvtxFunc_t InitializeInjectionNvtx2_fnptr;
|
||||
#else
|
||||
#define NVTX_SUPPORT_STATIC_INJECTION_LIBRARY 0
|
||||
#endif
|
||||
|
||||
/* This function tries to find or load an NVTX injection library and get the
|
||||
* address of its InitializeInjection2 function. If such a function pointer
|
||||
* is found, it is called, and passed the address of this NVTX instance's
|
||||
* nvtxGetExportTable function, so the injection can attach to this instance.
|
||||
* If the initialization fails for any reason, any dynamic library loaded will
|
||||
* be freed, and all NVTX implementation functions will be set to no-ops. If
|
||||
* initialization succeeds, NVTX functions not attached to the tool will be set
|
||||
* to no-ops. This is implemented as one function instead of several small
|
||||
* functions to minimize the number of weak symbols the linker must resolve.
|
||||
* Order of search is:
|
||||
* - Pre-injected library exporting InitializeInjectionNvtx2
|
||||
* - Loadable library exporting InitializeInjectionNvtx2
|
||||
* - Path specified by env var NVTX_INJECTION??_PATH (?? is 32 or 64)
|
||||
* - On Android, libNvtxInjection??.so within the package (?? is 32 or 64)
|
||||
* - Statically-linked injection library defining InitializeInjectionNvtx2_fnptr
|
||||
*/
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(void);
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)(void)
|
||||
{
|
||||
const char* const initFuncName = "InitializeInjectionNvtx2";
|
||||
NvtxInitializeInjectionNvtxFunc_t init_fnptr = (NvtxInitializeInjectionNvtxFunc_t)0;
|
||||
NVTX_DLLHANDLE injectionLibraryHandle = (NVTX_DLLHANDLE)0;
|
||||
int entryPointStatus = 0;
|
||||
|
||||
#if NVTX_SUPPORT_ALREADY_INJECTED_LIBRARY
|
||||
/* Use POSIX global symbol chain to query for init function from any module */
|
||||
init_fnptr = (NvtxInitializeInjectionNvtxFunc_t)NVTX_DLLFUNC(0, initFuncName);
|
||||
#endif
|
||||
|
||||
#if NVTX_SUPPORT_DYNAMIC_INJECTION_LIBRARY
|
||||
/* Try discovering dynamic injection library to load */
|
||||
if (!init_fnptr)
|
||||
{
|
||||
#if NVTX_SUPPORT_ENV_VARS
|
||||
/* If env var NVTX_INJECTION64_PATH is set, it should contain the path
|
||||
* to a 64-bit dynamic NVTX injection library (and similar for 32-bit). */
|
||||
const NVTX_PATHCHAR* const nvtxEnvVarName = (sizeof(void*) == 4)
|
||||
? NVTX_STR("NVTX_INJECTION32_PATH")
|
||||
: NVTX_STR("NVTX_INJECTION64_PATH");
|
||||
#endif /* NVTX_SUPPORT_ENV_VARS */
|
||||
NVTX_PATHCHAR injectionLibraryPathBuf[NVTX_BUFSIZE];
|
||||
const NVTX_PATHCHAR* injectionLibraryPath = (const NVTX_PATHCHAR*)0;
|
||||
|
||||
/* Refer to this variable explicitly in case all references to it are #if'ed out */
|
||||
(void)injectionLibraryPathBuf;
|
||||
|
||||
#if NVTX_SUPPORT_ENV_VARS
|
||||
/* Disable the warning for getenv & _wgetenv -- this usage is safe because
|
||||
* these functions are not called again before using the returned value. */
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning( push )
|
||||
#pragma warning( disable : 4996 )
|
||||
#endif
|
||||
injectionLibraryPath = NVTX_GETENV(nvtxEnvVarName);
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning( pop )
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
if (!injectionLibraryPath)
|
||||
{
|
||||
const char *bits = (sizeof(void*) == 4) ? "32" : "64";
|
||||
char cmdlineBuf[32];
|
||||
char pkgName[PATH_MAX];
|
||||
int count;
|
||||
int pid;
|
||||
FILE *fp;
|
||||
size_t bytesRead;
|
||||
size_t pos;
|
||||
|
||||
pid = (int)getpid();
|
||||
count = snprintf(cmdlineBuf, sizeof(cmdlineBuf), "/proc/%d/cmdline", pid);
|
||||
if (count <= 0 || count >= (int)sizeof(cmdlineBuf))
|
||||
{
|
||||
NVTX_ERR("Path buffer too small for: /proc/%d/cmdline\n", pid);
|
||||
return NVTX_ERR_INIT_ACCESS_LIBRARY;
|
||||
}
|
||||
|
||||
fp = fopen(cmdlineBuf, "r");
|
||||
if (!fp)
|
||||
{
|
||||
NVTX_ERR("File couldn't be opened: %s\n", cmdlineBuf);
|
||||
return NVTX_ERR_INIT_ACCESS_LIBRARY;
|
||||
}
|
||||
|
||||
bytesRead = fread(pkgName, 1, sizeof(pkgName) - 1, fp);
|
||||
fclose(fp);
|
||||
if (bytesRead == 0)
|
||||
{
|
||||
NVTX_ERR("Package name couldn't be read from file: %s\n", cmdlineBuf);
|
||||
return NVTX_ERR_INIT_ACCESS_LIBRARY;
|
||||
}
|
||||
|
||||
pkgName[bytesRead] = 0;
|
||||
|
||||
/* String can contain colon as a process separator. In this case the package name is before the colon. */
|
||||
pos = 0;
|
||||
while (pos < bytesRead && pkgName[pos] != ':' && pkgName[pos] != '\0')
|
||||
{
|
||||
++pos;
|
||||
}
|
||||
pkgName[pos] = 0;
|
||||
|
||||
count = snprintf(injectionLibraryPathBuf, NVTX_BUFSIZE, "/data/data/%s/files/libNvtxInjection%s.so", pkgName, bits);
|
||||
if (count <= 0 || count >= NVTX_BUFSIZE)
|
||||
{
|
||||
NVTX_ERR("Path buffer too small for: /data/data/%s/files/libNvtxInjection%s.so\n", pkgName, bits);
|
||||
return NVTX_ERR_INIT_ACCESS_LIBRARY;
|
||||
}
|
||||
|
||||
/* On Android, verify path is accessible due to aggressive file access restrictions. */
|
||||
/* For dlopen, if the filename contains a leading slash, then it is interpreted as a */
|
||||
/* relative or absolute pathname; otherwise it will follow the rules in ld.so. */
|
||||
if (injectionLibraryPathBuf[0] == '/')
|
||||
{
|
||||
#if (__ANDROID_API__ < 21)
|
||||
int access_err = access(injectionLibraryPathBuf, F_OK | R_OK);
|
||||
#else
|
||||
int access_err = faccessat(AT_FDCWD, injectionLibraryPathBuf, F_OK | R_OK, 0);
|
||||
#endif
|
||||
if (access_err != 0)
|
||||
{
|
||||
NVTX_ERR("Injection library path wasn't accessible [code=%s] [path=%s]\n", strerror(errno), injectionLibraryPathBuf);
|
||||
return NVTX_ERR_INIT_ACCESS_LIBRARY;
|
||||
}
|
||||
}
|
||||
injectionLibraryPath = injectionLibraryPathBuf;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* At this point, injectionLibraryPath is specified if a dynamic
|
||||
* injection library was specified by a tool. */
|
||||
if (injectionLibraryPath)
|
||||
{
|
||||
/* Load the injection library */
|
||||
injectionLibraryHandle = NVTX_DLLOPEN(injectionLibraryPath);
|
||||
if (!injectionLibraryHandle)
|
||||
{
|
||||
NVTX_ERR("Failed to load injection library\n");
|
||||
return NVTX_ERR_INIT_LOAD_LIBRARY;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Attempt to get the injection library's entry-point */
|
||||
init_fnptr = (NvtxInitializeInjectionNvtxFunc_t)NVTX_DLLFUNC(injectionLibraryHandle, initFuncName);
|
||||
if (!init_fnptr)
|
||||
{
|
||||
NVTX_DLLCLOSE(injectionLibraryHandle);
|
||||
NVTX_ERR("Failed to get address of function InitializeInjectionNvtx2 from injection library\n");
|
||||
return NVTX_ERR_INIT_MISSING_LIBRARY_ENTRY_POINT;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if NVTX_SUPPORT_STATIC_INJECTION_LIBRARY
|
||||
if (!init_fnptr)
|
||||
{
|
||||
/* Check weakly-defined function pointer. A statically-linked injection can define this as
|
||||
* a normal symbol and it will take precedence over a dynamic injection. */
|
||||
if (InitializeInjectionNvtx2_fnptr)
|
||||
{
|
||||
init_fnptr = InitializeInjectionNvtx2_fnptr;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* At this point, if init_fnptr is not set, then no tool has specified
|
||||
* an NVTX injection library -- return non-success result so all NVTX
|
||||
* API functions will be set to no-ops. */
|
||||
if (!init_fnptr)
|
||||
{
|
||||
return NVTX_ERR_NO_INJECTION_LIBRARY_AVAILABLE;
|
||||
}
|
||||
|
||||
/* Invoke injection library's initialization function. If it returns
|
||||
* 0 (failure) and a dynamic injection was loaded, unload it. */
|
||||
entryPointStatus = init_fnptr(NVTX_VERSIONED_IDENTIFIER(nvtxGetExportTable));
|
||||
if (entryPointStatus == 0)
|
||||
{
|
||||
NVTX_ERR("Failed to initialize injection library -- initialization function returned 0\n");
|
||||
if (injectionLibraryHandle)
|
||||
{
|
||||
NVTX_DLLCLOSE(injectionLibraryHandle);
|
||||
}
|
||||
return NVTX_ERR_INIT_FAILED_LIBRARY_ENTRY_POINT;
|
||||
}
|
||||
|
||||
return NVTX_SUCCESS;
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)(void)
|
||||
{
|
||||
unsigned int old;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState == NVTX_INIT_STATE_COMPLETE)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
NVTX_ATOMIC_CAS_32(
|
||||
old,
|
||||
&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState,
|
||||
NVTX_INIT_STATE_STARTED,
|
||||
NVTX_INIT_STATE_FRESH);
|
||||
if (old == NVTX_INIT_STATE_FRESH)
|
||||
{
|
||||
int result;
|
||||
int forceAllToNoops;
|
||||
|
||||
/* Load & initialize injection library -- it will assign the function pointers */
|
||||
result = NVTX_VERSIONED_IDENTIFIER(nvtxInitializeInjectionLibrary)();
|
||||
|
||||
/* Set all pointers not assigned by the injection to null */
|
||||
forceAllToNoops = result != NVTX_SUCCESS; /* Set all to null if injection init failed */
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(forceAllToNoops);
|
||||
|
||||
/* Signal that initialization has finished, so now the assigned function pointers will be used */
|
||||
NVTX_ATOMIC_WRITE_32(
|
||||
&NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState,
|
||||
NVTX_INIT_STATE_COMPLETE);
|
||||
}
|
||||
else /* Spin-wait until initialization has finished */
|
||||
{
|
||||
NVTX_MEMBAR();
|
||||
while (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).initState != NVTX_INIT_STATE_COMPLETE)
|
||||
{
|
||||
NVTX_YIELD();
|
||||
NVTX_MEMBAR();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#ifndef NVTX_IMPL_GUARD
|
||||
#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
|
||||
#endif
|
||||
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name);
|
||||
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name);
|
||||
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name);
|
||||
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name);
|
||||
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved);
|
||||
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtxSyncUser_t handle);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtxSyncUser_t handle);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtxSyncUser_t handle);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtxSyncUser_t handle);
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtxSyncUser_t handle);
|
||||
@@ -0,0 +1,573 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#ifndef NVTX_IMPL_GUARD
|
||||
#error Never include this file directly -- it is automatically included by nvToolsExt.h (except when NVTX_NO_IMPL is defined).
|
||||
#endif
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxMarkEx(eventAttrib);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init)(const char* message){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxMarkA(message);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init)(const wchar_t* message){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxMarkW(message);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxRangeStartEx(eventAttrib);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init)(const char* message){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxRangeStartA(message);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init)(const wchar_t* message){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxRangeStartW(message);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init)(nvtxRangeId_t id){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxRangeEnd(id);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init)(const nvtxEventAttributes_t* eventAttrib){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxRangePushEx(eventAttrib);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init)(const char* message){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxRangePushA(message);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init)(const wchar_t* message){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxRangePushW(message);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init)(void){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxRangePop();
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init)(uint32_t category, const char* name){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxNameCategoryA(category, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init)(uint32_t category, const wchar_t* name){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxNameCategoryW(category, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init)(uint32_t threadId, const char* name){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxNameOsThreadA(threadId, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init)(uint32_t threadId, const wchar_t* name){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxNameOsThreadW(threadId, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxDomainMarkEx(domain, eventAttrib);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION nvtxRangeId_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxDomainRangeStartEx(domain, eventAttrib);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init)(nvtxDomainHandle_t domain, nvtxRangeId_t id){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxDomainRangeEnd(domain, id);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxDomainRangePushEx(domain, eventAttrib);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION int NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init)(nvtxDomainHandle_t domain){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxDomainRangePop(domain);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION nvtxResourceHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxDomainResourceCreate(domain, attribs);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init)(nvtxResourceHandle_t resource){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxDomainResourceDestroy(resource);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const char* name){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxDomainNameCategoryA(domain, category, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxDomainNameCategoryW(domain, category, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init)(nvtxDomainHandle_t domain, const char* string){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxDomainRegisterStringA(domain, string);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION nvtxStringHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init)(nvtxDomainHandle_t domain, const wchar_t* string){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxDomainRegisterStringW(domain, string);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init)(const char* message){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxDomainCreateA(message);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION nvtxDomainHandle_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init)(const wchar_t* message){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
return nvtxDomainCreateW(message);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init)(nvtxDomainHandle_t domain){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxDomainDestroy(domain);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init)(const void* reserved){
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
nvtxInitialize(reserved);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init)(nvtx_CUdevice device, const char* name){
|
||||
nvtxNameCuDeviceA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr;
|
||||
if (local)
|
||||
local(device, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init)(nvtx_CUdevice device, const wchar_t* name){
|
||||
nvtxNameCuDeviceW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr;
|
||||
if (local)
|
||||
local(device, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init)(nvtx_CUcontext context, const char* name){
|
||||
nvtxNameCuContextA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr;
|
||||
if (local)
|
||||
local(context, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init)(nvtx_CUcontext context, const wchar_t* name){
|
||||
nvtxNameCuContextW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr;
|
||||
if (local)
|
||||
local(context, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init)(nvtx_CUstream stream, const char* name){
|
||||
nvtxNameCuStreamA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr;
|
||||
if (local)
|
||||
local(stream, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init)(nvtx_CUstream stream, const wchar_t* name){
|
||||
nvtxNameCuStreamW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr;
|
||||
if (local)
|
||||
local(stream, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init)(nvtx_CUevent event, const char* name){
|
||||
nvtxNameCuEventA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr;
|
||||
if (local)
|
||||
local(event, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init)(nvtx_CUevent event, const wchar_t* name){
|
||||
nvtxNameCuEventW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr;
|
||||
if (local)
|
||||
local(event, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init)(int device, const char* name){
|
||||
nvtxNameCudaDeviceA_impl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr;
|
||||
if (local)
|
||||
local(device, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init)(int device, const wchar_t* name){
|
||||
nvtxNameCudaDeviceW_impl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr;
|
||||
if (local)
|
||||
local(device, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init)(nvtx_cudaStream_t stream, const char* name){
|
||||
nvtxNameCudaStreamA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr;
|
||||
if (local)
|
||||
local(stream, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init)(nvtx_cudaStream_t stream, const wchar_t* name){
|
||||
nvtxNameCudaStreamW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr;
|
||||
if (local)
|
||||
local(stream, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init)(nvtx_cudaEvent_t event, const char* name){
|
||||
nvtxNameCudaEventA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr;
|
||||
if (local)
|
||||
local(event, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init)(nvtx_cudaEvent_t event, const wchar_t* name){
|
||||
nvtxNameCudaEventW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr;
|
||||
if (local)
|
||||
local(event, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init)(nvtx_cl_device_id device, const char* name){
|
||||
nvtxNameClDeviceA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr;
|
||||
if (local)
|
||||
local(device, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init)(nvtx_cl_device_id device, const wchar_t* name){
|
||||
nvtxNameClDeviceW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr;
|
||||
if (local)
|
||||
local(device, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init)(nvtx_cl_context context, const char* name){
|
||||
nvtxNameClContextA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr;
|
||||
if (local)
|
||||
local(context, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init)(nvtx_cl_context context, const wchar_t* name){
|
||||
nvtxNameClContextW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr;
|
||||
if (local)
|
||||
local(context, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init)(nvtx_cl_command_queue command_queue, const char* name){
|
||||
nvtxNameClCommandQueueA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr;
|
||||
if (local)
|
||||
local(command_queue, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init)(nvtx_cl_command_queue command_queue, const wchar_t* name){
|
||||
nvtxNameClCommandQueueW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr;
|
||||
if (local)
|
||||
local(command_queue, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init)(nvtx_cl_mem memobj, const char* name){
|
||||
nvtxNameClMemObjectA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr;
|
||||
if (local)
|
||||
local(memobj, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init)(nvtx_cl_mem memobj, const wchar_t* name){
|
||||
nvtxNameClMemObjectW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr;
|
||||
if (local)
|
||||
local(memobj, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init)(nvtx_cl_sampler sampler, const char* name){
|
||||
nvtxNameClSamplerA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr;
|
||||
if (local)
|
||||
local(sampler, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init)(nvtx_cl_sampler sampler, const wchar_t* name){
|
||||
nvtxNameClSamplerW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr;
|
||||
if (local)
|
||||
local(sampler, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init)(nvtx_cl_program program, const char* name){
|
||||
nvtxNameClProgramA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr;
|
||||
if (local)
|
||||
local(program, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init)(nvtx_cl_program program, const wchar_t* name){
|
||||
nvtxNameClProgramW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr;
|
||||
if (local)
|
||||
local(program, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init)(nvtx_cl_event evnt, const char* name){
|
||||
nvtxNameClEventA_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr;
|
||||
if (local)
|
||||
local(evnt, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init)(nvtx_cl_event evnt, const wchar_t* name){
|
||||
nvtxNameClEventW_fakeimpl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr;
|
||||
if (local)
|
||||
local(evnt, name);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION nvtxSyncUser_t NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs){
|
||||
nvtxDomainSyncUserCreate_impl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr;
|
||||
if (local) {
|
||||
return local(domain, attribs);
|
||||
}
|
||||
return (nvtxSyncUser_t)0;
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init)(nvtxSyncUser_t handle){
|
||||
nvtxDomainSyncUserDestroy_impl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr;
|
||||
if (local)
|
||||
local(handle);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init)(nvtxSyncUser_t handle){
|
||||
nvtxDomainSyncUserAcquireStart_impl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr;
|
||||
if (local)
|
||||
local(handle);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init)(nvtxSyncUser_t handle){
|
||||
nvtxDomainSyncUserAcquireFailed_impl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr;
|
||||
if (local)
|
||||
local(handle);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init)(nvtxSyncUser_t handle){
|
||||
nvtxDomainSyncUserAcquireSuccess_impl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr;
|
||||
if (local)
|
||||
local(handle);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_API NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init)(nvtxSyncUser_t handle){
|
||||
nvtxDomainSyncUserReleasing_impl_fntype local;
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxInitOnce)();
|
||||
local = NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr;
|
||||
if (local)
|
||||
local(handle);
|
||||
}
|
||||
|
||||
NVTX_LINKONCE_FWDDECL_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops);
|
||||
NVTX_LINKONCE_DEFINE_FUNCTION void NVTX_VERSIONED_IDENTIFIER(nvtxSetInitFunctionsToNoops)(int forceAllToNoops)
|
||||
{
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkEx_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkEx_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxMarkW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxMarkW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartEx_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartEx_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeStartW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeStartW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangeEnd_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangeEnd_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushEx_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushEx_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePushW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePushW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxRangePop_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxRangePop_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCategoryW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCategoryW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameOsThreadW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameOsThreadW_impl_fnptr = NULL;
|
||||
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuDeviceW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuDeviceW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuContextW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuContextW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuStreamW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuStreamW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCuEventW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCuEventW_impl_fnptr = NULL;
|
||||
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClDeviceW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClDeviceW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClContextW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClContextW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClCommandQueueW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClCommandQueueW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClMemObjectW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClMemObjectW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClSamplerW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClSamplerW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClProgramW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClProgramW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameClEventW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameClEventW_impl_fnptr = NULL;
|
||||
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaDeviceW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaDeviceW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaStreamW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaStreamW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxNameCudaEventW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxNameCudaEventW_impl_fnptr = NULL;
|
||||
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainMarkEx_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainMarkEx_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeStartEx_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeStartEx_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangeEnd_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangeEnd_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePushEx_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePushEx_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRangePop_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRangePop_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceCreate_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceCreate_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainResourceDestroy_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainResourceDestroy_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainNameCategoryW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainNameCategoryW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainRegisterStringW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainRegisterStringW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateA_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateA_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainCreateW_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainCreateW_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainDestroy_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainDestroy_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxInitialize_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxInitialize_impl_fnptr = NULL;
|
||||
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserCreate_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserCreate_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserDestroy_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserDestroy_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireStart_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireStart_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireFailed_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireFailed_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserAcquireSuccess_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserAcquireSuccess_impl_fnptr = NULL;
|
||||
if (NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr == NVTX_VERSIONED_IDENTIFIER(nvtxDomainSyncUserReleasing_impl_init) || forceAllToNoops)
|
||||
NVTX_VERSIONED_IDENTIFIER(nvtxGlobals).nvtxDomainSyncUserReleasing_impl_fnptr = NULL;
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
#ifndef __NVTX_LINKONCE_H__
|
||||
#define __NVTX_LINKONCE_H__
|
||||
|
||||
/* This header defines macros to permit making definitions of global variables
|
||||
* and functions in C/C++ header files which may be included multiple times in
|
||||
* a translation unit or linkage unit. It allows authoring header-only libraries
|
||||
* which can be used by multiple other header-only libraries (either as the same
|
||||
* copy or multiple copies), and does not require any build changes, such as
|
||||
* adding another .c file, linking a static library, or deploying a dynamic
|
||||
* library. Globals defined with these macros have the property that they have
|
||||
* the same address, pointing to a single instance, for the entire linkage unit.
|
||||
* It is expected but not guaranteed that each linkage unit will have a separate
|
||||
* instance.
|
||||
*
|
||||
* In some situations it is desirable to declare a variable without initializing
|
||||
* it, refer to it in code or other variables' initializers, and then initialize
|
||||
* it later. Similarly, functions can be prototyped, have their address taken,
|
||||
* and then have their body defined later. In such cases, use the FWDDECL macros
|
||||
* when forward-declaring LINKONCE global variables without initializers and
|
||||
* function prototypes, and then use the DEFINE macros when later defining them.
|
||||
* Although in many cases the FWDDECL macro is equivalent to the DEFINE macro,
|
||||
* following this pattern makes code maximally portable.
|
||||
*/
|
||||
|
||||
#if defined(__MINGW32__) /* MinGW */
|
||||
#define NVTX_LINKONCE_WEAK __attribute__((section(".gnu.linkonce.0.")))
|
||||
#if defined(__cplusplus)
|
||||
#define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany)
|
||||
#define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline NVTX_LINKONCE_WEAK
|
||||
#else
|
||||
#define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany)
|
||||
#define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_WEAK
|
||||
#endif
|
||||
#elif defined(_MSC_VER) /* MSVC */
|
||||
#if defined(__cplusplus)
|
||||
#define NVTX_LINKONCE_DEFINE_GLOBAL extern "C" __declspec(selectany)
|
||||
#define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline
|
||||
#else
|
||||
#define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany)
|
||||
#define NVTX_LINKONCE_DEFINE_FUNCTION __inline
|
||||
#endif
|
||||
#elif defined(__CYGWIN__) && defined(__clang__) /* Clang on Cygwin */
|
||||
#define NVTX_LINKONCE_WEAK __attribute__((section(".gnu.linkonce.0.")))
|
||||
#if defined(__cplusplus)
|
||||
#define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_WEAK
|
||||
#define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" NVTX_LINKONCE_WEAK
|
||||
#else
|
||||
#define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_WEAK
|
||||
#define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_WEAK
|
||||
#endif
|
||||
#elif defined(__CYGWIN__) /* Assume GCC or compatible */
|
||||
#define NVTX_LINKONCE_WEAK __attribute__((weak))
|
||||
#if defined(__cplusplus)
|
||||
#define NVTX_LINKONCE_DEFINE_GLOBAL __declspec(selectany)
|
||||
#define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" inline
|
||||
#else
|
||||
#define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_WEAK
|
||||
#define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_WEAK
|
||||
#endif
|
||||
#else /* All others: Assume GCC, clang, or compatible */
|
||||
#define NVTX_LINKONCE_WEAK __attribute__((weak))
|
||||
#define NVTX_LINKONCE_HIDDEN __attribute__((visibility("hidden")))
|
||||
#if defined(__cplusplus)
|
||||
#define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
|
||||
#define NVTX_LINKONCE_DEFINE_FUNCTION extern "C" NVTX_LINKONCE_HIDDEN inline
|
||||
#else
|
||||
#define NVTX_LINKONCE_DEFINE_GLOBAL NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
|
||||
#define NVTX_LINKONCE_DEFINE_FUNCTION NVTX_LINKONCE_HIDDEN NVTX_LINKONCE_WEAK
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define NVTX_LINKONCE_FWDDECL_GLOBAL NVTX_LINKONCE_DEFINE_GLOBAL extern
|
||||
#define NVTX_LINKONCE_FWDDECL_FUNCTION NVTX_LINKONCE_DEFINE_FUNCTION
|
||||
|
||||
#endif /* __NVTX_LINKONCE_H__ */
|
||||
@@ -0,0 +1,304 @@
|
||||
/*
|
||||
* Copyright 2009-2020 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*/
|
||||
|
||||
/* This header defines types which are used by the internal implementation
|
||||
* of NVTX and callback subscribers. API clients do not use these types,
|
||||
* so they are defined here instead of in nvToolsExt.h to clarify they are
|
||||
* not part of the NVTX client API. */
|
||||
|
||||
#ifndef NVTX_IMPL_GUARD
|
||||
#error Never include this file directly -- it is automatically included by nvToolsExt.h.
|
||||
#endif
|
||||
|
||||
/* ------ Dependency-free types binary-compatible with real types ------- */
|
||||
|
||||
/* In order to avoid having the NVTX core API headers depend on non-NVTX
|
||||
* headers like cuda.h, NVTX defines binary-compatible types to use for
|
||||
* safely making the initialization versions of all NVTX functions without
|
||||
* needing to have definitions for the real types. */
|
||||
|
||||
typedef int nvtx_CUdevice;
|
||||
typedef void* nvtx_CUcontext;
|
||||
typedef void* nvtx_CUstream;
|
||||
typedef void* nvtx_CUevent;
|
||||
|
||||
typedef void* nvtx_cudaStream_t;
|
||||
typedef void* nvtx_cudaEvent_t;
|
||||
|
||||
typedef void* nvtx_cl_platform_id;
|
||||
typedef void* nvtx_cl_device_id;
|
||||
typedef void* nvtx_cl_context;
|
||||
typedef void* nvtx_cl_command_queue;
|
||||
typedef void* nvtx_cl_mem;
|
||||
typedef void* nvtx_cl_program;
|
||||
typedef void* nvtx_cl_kernel;
|
||||
typedef void* nvtx_cl_event;
|
||||
typedef void* nvtx_cl_sampler;
|
||||
|
||||
typedef struct nvtxSyncUser* nvtxSyncUser_t;
|
||||
struct nvtxSyncUserAttributes_v0;
|
||||
typedef struct nvtxSyncUserAttributes_v0 nvtxSyncUserAttributes_t;
|
||||
|
||||
/* --------- Types for function pointers (with fake API types) ---------- */
|
||||
|
||||
typedef void (NVTX_API * nvtxMarkEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
|
||||
typedef void (NVTX_API * nvtxMarkA_impl_fntype)(const char* message);
|
||||
typedef void (NVTX_API * nvtxMarkW_impl_fntype)(const wchar_t* message);
|
||||
typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
|
||||
typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartA_impl_fntype)(const char* message);
|
||||
typedef nvtxRangeId_t (NVTX_API * nvtxRangeStartW_impl_fntype)(const wchar_t* message);
|
||||
typedef void (NVTX_API * nvtxRangeEnd_impl_fntype)(nvtxRangeId_t id);
|
||||
typedef int (NVTX_API * nvtxRangePushEx_impl_fntype)(const nvtxEventAttributes_t* eventAttrib);
|
||||
typedef int (NVTX_API * nvtxRangePushA_impl_fntype)(const char* message);
|
||||
typedef int (NVTX_API * nvtxRangePushW_impl_fntype)(const wchar_t* message);
|
||||
typedef int (NVTX_API * nvtxRangePop_impl_fntype)(void);
|
||||
typedef void (NVTX_API * nvtxNameCategoryA_impl_fntype)(uint32_t category, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCategoryW_impl_fntype)(uint32_t category, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameOsThreadA_impl_fntype)(uint32_t threadId, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameOsThreadW_impl_fntype)(uint32_t threadId, const wchar_t* name);
|
||||
|
||||
/* Real impl types are defined in nvtxImplCuda_v3.h, where CUDA headers are included */
|
||||
typedef void (NVTX_API * nvtxNameCuDeviceA_fakeimpl_fntype)(nvtx_CUdevice device, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCuDeviceW_fakeimpl_fntype)(nvtx_CUdevice device, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameCuContextA_fakeimpl_fntype)(nvtx_CUcontext context, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCuContextW_fakeimpl_fntype)(nvtx_CUcontext context, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameCuStreamA_fakeimpl_fntype)(nvtx_CUstream stream, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCuStreamW_fakeimpl_fntype)(nvtx_CUstream stream, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameCuEventA_fakeimpl_fntype)(nvtx_CUevent event, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCuEventW_fakeimpl_fntype)(nvtx_CUevent event, const wchar_t* name);
|
||||
|
||||
/* Real impl types are defined in nvtxImplOpenCL_v3.h, where OPENCL headers are included */
|
||||
typedef void (NVTX_API * nvtxNameClDeviceA_fakeimpl_fntype)(nvtx_cl_device_id device, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClDeviceW_fakeimpl_fntype)(nvtx_cl_device_id device, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameClContextA_fakeimpl_fntype)(nvtx_cl_context context, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClContextW_fakeimpl_fntype)(nvtx_cl_context context, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameClCommandQueueA_fakeimpl_fntype)(nvtx_cl_command_queue command_queue, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClCommandQueueW_fakeimpl_fntype)(nvtx_cl_command_queue command_queue, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameClMemObjectA_fakeimpl_fntype)(nvtx_cl_mem memobj, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClMemObjectW_fakeimpl_fntype)(nvtx_cl_mem memobj, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameClSamplerA_fakeimpl_fntype)(nvtx_cl_sampler sampler, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClSamplerW_fakeimpl_fntype)(nvtx_cl_sampler sampler, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameClProgramA_fakeimpl_fntype)(nvtx_cl_program program, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClProgramW_fakeimpl_fntype)(nvtx_cl_program program, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameClEventA_fakeimpl_fntype)(nvtx_cl_event evnt, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameClEventW_fakeimpl_fntype)(nvtx_cl_event evnt, const wchar_t* name);
|
||||
|
||||
/* Real impl types are defined in nvtxImplCudaRt_v3.h, where CUDART headers are included */
|
||||
typedef void (NVTX_API * nvtxNameCudaDeviceA_impl_fntype)(int device, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCudaDeviceW_impl_fntype)(int device, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameCudaStreamA_fakeimpl_fntype)(nvtx_cudaStream_t stream, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCudaStreamW_fakeimpl_fntype)(nvtx_cudaStream_t stream, const wchar_t* name);
|
||||
typedef void (NVTX_API * nvtxNameCudaEventA_fakeimpl_fntype)(nvtx_cudaEvent_t event, const char* name);
|
||||
typedef void (NVTX_API * nvtxNameCudaEventW_fakeimpl_fntype)(nvtx_cudaEvent_t event, const wchar_t* name);
|
||||
|
||||
typedef void (NVTX_API * nvtxDomainMarkEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
|
||||
typedef nvtxRangeId_t (NVTX_API * nvtxDomainRangeStartEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
|
||||
typedef void (NVTX_API * nvtxDomainRangeEnd_impl_fntype)(nvtxDomainHandle_t domain, nvtxRangeId_t id);
|
||||
typedef int (NVTX_API * nvtxDomainRangePushEx_impl_fntype)(nvtxDomainHandle_t domain, const nvtxEventAttributes_t* eventAttrib);
|
||||
typedef int (NVTX_API * nvtxDomainRangePop_impl_fntype)(nvtxDomainHandle_t domain);
|
||||
typedef nvtxResourceHandle_t (NVTX_API * nvtxDomainResourceCreate_impl_fntype)(nvtxDomainHandle_t domain, nvtxResourceAttributes_t* attribs);
|
||||
typedef void (NVTX_API * nvtxDomainResourceDestroy_impl_fntype)(nvtxResourceHandle_t resource);
|
||||
typedef void (NVTX_API * nvtxDomainNameCategoryA_impl_fntype)(nvtxDomainHandle_t domain, uint32_t category, const char* name);
|
||||
typedef void (NVTX_API * nvtxDomainNameCategoryW_impl_fntype)(nvtxDomainHandle_t domain, uint32_t category, const wchar_t* name);
|
||||
typedef nvtxStringHandle_t (NVTX_API * nvtxDomainRegisterStringA_impl_fntype)(nvtxDomainHandle_t domain, const char* string);
|
||||
typedef nvtxStringHandle_t (NVTX_API * nvtxDomainRegisterStringW_impl_fntype)(nvtxDomainHandle_t domain, const wchar_t* string);
|
||||
typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateA_impl_fntype)(const char* message);
|
||||
typedef nvtxDomainHandle_t (NVTX_API * nvtxDomainCreateW_impl_fntype)(const wchar_t* message);
|
||||
typedef void (NVTX_API * nvtxDomainDestroy_impl_fntype)(nvtxDomainHandle_t domain);
|
||||
typedef void (NVTX_API * nvtxInitialize_impl_fntype)(const void* reserved);
|
||||
|
||||
typedef nvtxSyncUser_t (NVTX_API * nvtxDomainSyncUserCreate_impl_fntype)(nvtxDomainHandle_t domain, const nvtxSyncUserAttributes_t* attribs);
|
||||
typedef void (NVTX_API * nvtxDomainSyncUserDestroy_impl_fntype)(nvtxSyncUser_t handle);
|
||||
typedef void (NVTX_API * nvtxDomainSyncUserAcquireStart_impl_fntype)(nvtxSyncUser_t handle);
|
||||
typedef void (NVTX_API * nvtxDomainSyncUserAcquireFailed_impl_fntype)(nvtxSyncUser_t handle);
|
||||
typedef void (NVTX_API * nvtxDomainSyncUserAcquireSuccess_impl_fntype)(nvtxSyncUser_t handle);
|
||||
typedef void (NVTX_API * nvtxDomainSyncUserReleasing_impl_fntype)(nvtxSyncUser_t handle);
|
||||
|
||||
/* ---------------- Types for callback subscription --------------------- */
|
||||
|
||||
typedef const void *(NVTX_API * NvtxGetExportTableFunc_t)(uint32_t exportTableId);
|
||||
typedef int (NVTX_API * NvtxInitializeInjectionNvtxFunc_t)(NvtxGetExportTableFunc_t exportTable);
|
||||
|
||||
typedef enum NvtxCallbackModule
|
||||
{
|
||||
NVTX_CB_MODULE_INVALID = 0,
|
||||
NVTX_CB_MODULE_CORE = 1,
|
||||
NVTX_CB_MODULE_CUDA = 2,
|
||||
NVTX_CB_MODULE_OPENCL = 3,
|
||||
NVTX_CB_MODULE_CUDART = 4,
|
||||
NVTX_CB_MODULE_CORE2 = 5,
|
||||
NVTX_CB_MODULE_SYNC = 6,
|
||||
/* --- New constants must only be added directly above this line --- */
|
||||
NVTX_CB_MODULE_SIZE,
|
||||
NVTX_CB_MODULE_FORCE_INT = 0x7fffffff
|
||||
} NvtxCallbackModule;
|
||||
|
||||
typedef enum NvtxCallbackIdCore
|
||||
{
|
||||
NVTX_CBID_CORE_INVALID = 0,
|
||||
NVTX_CBID_CORE_MarkEx = 1,
|
||||
NVTX_CBID_CORE_MarkA = 2,
|
||||
NVTX_CBID_CORE_MarkW = 3,
|
||||
NVTX_CBID_CORE_RangeStartEx = 4,
|
||||
NVTX_CBID_CORE_RangeStartA = 5,
|
||||
NVTX_CBID_CORE_RangeStartW = 6,
|
||||
NVTX_CBID_CORE_RangeEnd = 7,
|
||||
NVTX_CBID_CORE_RangePushEx = 8,
|
||||
NVTX_CBID_CORE_RangePushA = 9,
|
||||
NVTX_CBID_CORE_RangePushW = 10,
|
||||
NVTX_CBID_CORE_RangePop = 11,
|
||||
NVTX_CBID_CORE_NameCategoryA = 12,
|
||||
NVTX_CBID_CORE_NameCategoryW = 13,
|
||||
NVTX_CBID_CORE_NameOsThreadA = 14,
|
||||
NVTX_CBID_CORE_NameOsThreadW = 15,
|
||||
/* --- New constants must only be added directly above this line --- */
|
||||
NVTX_CBID_CORE_SIZE,
|
||||
NVTX_CBID_CORE_FORCE_INT = 0x7fffffff
|
||||
} NvtxCallbackIdCore;
|
||||
|
||||
typedef enum NvtxCallbackIdCore2
|
||||
{
|
||||
NVTX_CBID_CORE2_INVALID = 0,
|
||||
NVTX_CBID_CORE2_DomainMarkEx = 1,
|
||||
NVTX_CBID_CORE2_DomainRangeStartEx = 2,
|
||||
NVTX_CBID_CORE2_DomainRangeEnd = 3,
|
||||
NVTX_CBID_CORE2_DomainRangePushEx = 4,
|
||||
NVTX_CBID_CORE2_DomainRangePop = 5,
|
||||
NVTX_CBID_CORE2_DomainResourceCreate = 6,
|
||||
NVTX_CBID_CORE2_DomainResourceDestroy = 7,
|
||||
NVTX_CBID_CORE2_DomainNameCategoryA = 8,
|
||||
NVTX_CBID_CORE2_DomainNameCategoryW = 9,
|
||||
NVTX_CBID_CORE2_DomainRegisterStringA = 10,
|
||||
NVTX_CBID_CORE2_DomainRegisterStringW = 11,
|
||||
NVTX_CBID_CORE2_DomainCreateA = 12,
|
||||
NVTX_CBID_CORE2_DomainCreateW = 13,
|
||||
NVTX_CBID_CORE2_DomainDestroy = 14,
|
||||
NVTX_CBID_CORE2_Initialize = 15,
|
||||
/* --- New constants must only be added directly above this line --- */
|
||||
NVTX_CBID_CORE2_SIZE,
|
||||
NVTX_CBID_CORE2_FORCE_INT = 0x7fffffff
|
||||
} NvtxCallbackIdCore2;
|
||||
|
||||
typedef enum NvtxCallbackIdCuda
|
||||
{
|
||||
NVTX_CBID_CUDA_INVALID = 0,
|
||||
NVTX_CBID_CUDA_NameCuDeviceA = 1,
|
||||
NVTX_CBID_CUDA_NameCuDeviceW = 2,
|
||||
NVTX_CBID_CUDA_NameCuContextA = 3,
|
||||
NVTX_CBID_CUDA_NameCuContextW = 4,
|
||||
NVTX_CBID_CUDA_NameCuStreamA = 5,
|
||||
NVTX_CBID_CUDA_NameCuStreamW = 6,
|
||||
NVTX_CBID_CUDA_NameCuEventA = 7,
|
||||
NVTX_CBID_CUDA_NameCuEventW = 8,
|
||||
/* --- New constants must only be added directly above this line --- */
|
||||
NVTX_CBID_CUDA_SIZE,
|
||||
NVTX_CBID_CUDA_FORCE_INT = 0x7fffffff
|
||||
} NvtxCallbackIdCuda;
|
||||
|
||||
typedef enum NvtxCallbackIdCudaRt
|
||||
{
|
||||
NVTX_CBID_CUDART_INVALID = 0,
|
||||
NVTX_CBID_CUDART_NameCudaDeviceA = 1,
|
||||
NVTX_CBID_CUDART_NameCudaDeviceW = 2,
|
||||
NVTX_CBID_CUDART_NameCudaStreamA = 3,
|
||||
NVTX_CBID_CUDART_NameCudaStreamW = 4,
|
||||
NVTX_CBID_CUDART_NameCudaEventA = 5,
|
||||
NVTX_CBID_CUDART_NameCudaEventW = 6,
|
||||
/* --- New constants must only be added directly above this line --- */
|
||||
NVTX_CBID_CUDART_SIZE,
|
||||
NVTX_CBID_CUDART_FORCE_INT = 0x7fffffff
|
||||
} NvtxCallbackIdCudaRt;
|
||||
|
||||
typedef enum NvtxCallbackIdOpenCL
|
||||
{
|
||||
NVTX_CBID_OPENCL_INVALID = 0,
|
||||
NVTX_CBID_OPENCL_NameClDeviceA = 1,
|
||||
NVTX_CBID_OPENCL_NameClDeviceW = 2,
|
||||
NVTX_CBID_OPENCL_NameClContextA = 3,
|
||||
NVTX_CBID_OPENCL_NameClContextW = 4,
|
||||
NVTX_CBID_OPENCL_NameClCommandQueueA = 5,
|
||||
NVTX_CBID_OPENCL_NameClCommandQueueW = 6,
|
||||
NVTX_CBID_OPENCL_NameClMemObjectA = 7,
|
||||
NVTX_CBID_OPENCL_NameClMemObjectW = 8,
|
||||
NVTX_CBID_OPENCL_NameClSamplerA = 9,
|
||||
NVTX_CBID_OPENCL_NameClSamplerW = 10,
|
||||
NVTX_CBID_OPENCL_NameClProgramA = 11,
|
||||
NVTX_CBID_OPENCL_NameClProgramW = 12,
|
||||
NVTX_CBID_OPENCL_NameClEventA = 13,
|
||||
NVTX_CBID_OPENCL_NameClEventW = 14,
|
||||
/* --- New constants must only be added directly above this line --- */
|
||||
NVTX_CBID_OPENCL_SIZE,
|
||||
NVTX_CBID_OPENCL_FORCE_INT = 0x7fffffff
|
||||
} NvtxCallbackIdOpenCL;
|
||||
|
||||
typedef enum NvtxCallbackIdSync
|
||||
{
|
||||
NVTX_CBID_SYNC_INVALID = 0,
|
||||
NVTX_CBID_SYNC_DomainSyncUserCreate = 1,
|
||||
NVTX_CBID_SYNC_DomainSyncUserDestroy = 2,
|
||||
NVTX_CBID_SYNC_DomainSyncUserAcquireStart = 3,
|
||||
NVTX_CBID_SYNC_DomainSyncUserAcquireFailed = 4,
|
||||
NVTX_CBID_SYNC_DomainSyncUserAcquireSuccess = 5,
|
||||
NVTX_CBID_SYNC_DomainSyncUserReleasing = 6,
|
||||
/* --- New constants must only be added directly above this line --- */
|
||||
NVTX_CBID_SYNC_SIZE,
|
||||
NVTX_CBID_SYNC_FORCE_INT = 0x7fffffff
|
||||
} NvtxCallbackIdSync;
|
||||
|
||||
/* IDs for NVTX Export Tables */
|
||||
typedef enum NvtxExportTableID
|
||||
{
|
||||
NVTX_ETID_INVALID = 0,
|
||||
NVTX_ETID_CALLBACKS = 1,
|
||||
NVTX_ETID_RESERVED0 = 2,
|
||||
NVTX_ETID_VERSIONINFO = 3,
|
||||
/* --- New constants must only be added directly above this line --- */
|
||||
NVTX_ETID_SIZE,
|
||||
NVTX_ETID_FORCE_INT = 0x7fffffff
|
||||
} NvtxExportTableID;
|
||||
|
||||
typedef void (* NvtxFunctionPointer)(void); /* generic uncallable function pointer, must be casted to appropriate function type */
|
||||
typedef NvtxFunctionPointer** NvtxFunctionTable; /* double pointer because array(1) of pointers(2) to function pointers */
|
||||
|
||||
typedef struct NvtxExportTableCallbacks
|
||||
{
|
||||
size_t struct_size;
|
||||
|
||||
/* returns an array of pointer to function pointers*/
|
||||
int (NVTX_API *GetModuleFunctionTable)(
|
||||
NvtxCallbackModule module,
|
||||
NvtxFunctionTable* out_table,
|
||||
unsigned int* out_size);
|
||||
} NvtxExportTableCallbacks;
|
||||
|
||||
typedef struct NvtxExportTableVersionInfo
|
||||
{
|
||||
/* sizeof(NvtxExportTableVersionInfo) */
|
||||
size_t struct_size;
|
||||
|
||||
/* The API version comes from the NVTX library linked to the app. The
|
||||
* injection library is can use this info to make some assumptions */
|
||||
uint32_t version;
|
||||
|
||||
/* Reserved for alignment, do not use */
|
||||
uint32_t reserved0;
|
||||
|
||||
/* This must be set by tools when attaching to provide applications
|
||||
* the ability to, in emergency situations, detect problematic tools
|
||||
* versions and modify the NVTX source to prevent attaching anything
|
||||
* that causes trouble in the app. Currently, this value is ignored. */
|
||||
void (NVTX_API *SetInjectionNvtxVersion)(
|
||||
uint32_t version);
|
||||
} NvtxExportTableVersionInfo;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Executable
@@ -0,0 +1,15 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
|
||||
#ifndef NCCL_NVTX_STUB_H_
|
||||
#define NCCL_NVTX_STUB_H_
|
||||
|
||||
struct nccl_domain{static constexpr char const* name{"NCCL"};};
|
||||
|
||||
#define NVTX3_FUNC_RANGE_IN(domain)
|
||||
#define nvtxNameOsThreadA(syscall, thread)
|
||||
|
||||
#endif
|
||||
+25
-14
@@ -10,23 +10,34 @@
|
||||
#define NCCL_P2P_H_
|
||||
|
||||
struct ncclP2Pinfo {
|
||||
const void* sendbuff;
|
||||
void* recvbuff;
|
||||
ssize_t sendbytes;
|
||||
ssize_t recvbytes;
|
||||
};
|
||||
|
||||
struct ncclP2PConnect {
|
||||
int nrecv[MAXCHANNELS];
|
||||
int nsend[MAXCHANNELS];
|
||||
int* recv;
|
||||
int* send;
|
||||
void* buff;
|
||||
ssize_t nbytes;
|
||||
struct ncclP2Pinfo* next;
|
||||
};
|
||||
|
||||
struct ncclP2Plist {
|
||||
struct ncclP2Pinfo *peerlist;
|
||||
int count;
|
||||
struct ncclP2PConnect connect;
|
||||
struct ncclP2Pinfo *head;
|
||||
struct ncclP2Pinfo *tail;
|
||||
};
|
||||
|
||||
static ncclResult_t enqueueP2pInfo(ncclP2Plist* p2p, void* buff, ssize_t nBytes) {
|
||||
if (p2p == NULL) return ncclInternalError;
|
||||
struct ncclP2Pinfo* next;
|
||||
NCCLCHECK(ncclCalloc(&next, 1));
|
||||
next->buff = buff;
|
||||
next->nbytes = nBytes;
|
||||
if (p2p->tail != NULL) p2p->tail->next = next;
|
||||
p2p->tail = next;
|
||||
if (p2p->head == NULL) p2p->head = next;
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
static ncclResult_t dequeueP2pInfo(ncclP2Plist* p2p) {
|
||||
if (p2p == NULL) return ncclInternalError;
|
||||
struct ncclP2Pinfo* temp = p2p->head;
|
||||
p2p->head = p2p->head->next;
|
||||
if (p2p->tail == temp) p2p->tail = NULL;
|
||||
free(temp);
|
||||
return ncclSuccess;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -31,10 +31,11 @@ static void setEnvFile(const char* fileName) {
|
||||
int s=0; // Env Var Size
|
||||
while (line[s] != '\0' && line[s] != '=') s++;
|
||||
if (line[s] == '\0') continue;
|
||||
strncpy(envVar, line, std::min(1024,s));
|
||||
strncpy(envVar, line, std::min(1023,s));
|
||||
envVar[s] = '\0';
|
||||
s++;
|
||||
strncpy(envValue, line+s, 1024);
|
||||
strncpy(envValue, line+s, 1023);
|
||||
envValue[1023]='\0';
|
||||
setenv(envVar, envValue, 0);
|
||||
}
|
||||
if (line) free(line);
|
||||
|
||||
@@ -18,18 +18,23 @@ struct ncclProxyArgs {
|
||||
proxyProgressFunc_t progress;
|
||||
struct ncclChannel* channel;
|
||||
struct ncclConnector* connector;
|
||||
size_t sendbytes;
|
||||
size_t recvbytes;
|
||||
int sliceSteps;
|
||||
int chunkSteps;
|
||||
int nsteps;
|
||||
uint64_t opCount;
|
||||
int protocol;
|
||||
int segment; // Only for profiling
|
||||
ncclDataType_t dtype;
|
||||
ncclRedOp_t redOp;
|
||||
int state; // add component before this line -- it is left out during initialization
|
||||
|
||||
// Internal state
|
||||
uint64_t head;
|
||||
uint64_t tail;
|
||||
uint64_t posted;
|
||||
uint64_t received; // Only used by recv proxy to wait for flush.
|
||||
uint64_t transmitted;
|
||||
uint64_t done;
|
||||
uint64_t end;
|
||||
void* requests[NCCL_STEPS];
|
||||
int idle;
|
||||
@@ -38,14 +43,30 @@ struct ncclProxyArgs {
|
||||
pthread_mutex_t mutex;
|
||||
struct ncclProxyArgs* next;
|
||||
struct ncclProxyArgs* nextPeer;
|
||||
struct ncclProxyArgs* nextGroup;
|
||||
struct ncclProxyArgs** proxyAppendPtr;
|
||||
};
|
||||
|
||||
struct ncclProxySharedBuffers {
|
||||
int nslots;
|
||||
int slotSize;
|
||||
char* cudaBuff[2*MAXCHANNELS];
|
||||
int* cudaUsed[2*MAXCHANNELS];
|
||||
char* hostBuff[2*MAXCHANNELS];
|
||||
int* hostUsed[2*MAXCHANNELS];
|
||||
struct ncclProxyArgs* proxyAppend[2*MAXCHANNELS]; // Separate send and recv
|
||||
};
|
||||
|
||||
struct ncclProxyPool;
|
||||
struct ncclProxyState {
|
||||
pthread_cond_t cond;
|
||||
pthread_mutex_t mutex;
|
||||
pthread_mutex_t opsMutex;
|
||||
pthread_mutex_t poolMutex;
|
||||
bool stop;
|
||||
struct ncclProxySharedBuffers* sharedBuffs;
|
||||
struct ncclProxyArgs* ops;
|
||||
struct ncclProxyArgs* nextOps;
|
||||
struct ncclProxyArgs* nextOpsEnd;
|
||||
struct ncclProxyArgs* pool;
|
||||
struct ncclProxyPool* pools;
|
||||
};
|
||||
@@ -59,12 +80,16 @@ enum proxyMode {
|
||||
};
|
||||
|
||||
ncclResult_t ncclProxySaveColl(struct ncclProxyArgs* args, int pattern, int root, int nranks);
|
||||
ncclResult_t ncclProxySaveP2p(struct ncclInfo* info, struct ncclChannel* channel);
|
||||
ncclResult_t ncclProxySaveA2a(struct ncclProxyArgs* args, struct ncclInfo* info);
|
||||
ncclResult_t ncclProxySaveP2p(struct ncclInfo* info, struct ncclChannel* channel, int segment);
|
||||
ncclResult_t ncclProxyStart(struct ncclComm* comm);
|
||||
ncclResult_t ncclProxyCreate(struct ncclComm* comm);
|
||||
ncclResult_t ncclProxyDestroy(struct ncclComm* comm);
|
||||
|
||||
ncclResult_t ncclProxySharedBuffersInit(struct ncclComm* comm, int cuda, int* size, char** ptr);
|
||||
ncclResult_t ncclProxySharedBuffersAlloc(struct ncclComm* comm, int cuda, int type, int channel, int size, char** ptr);
|
||||
ncclResult_t ncclProxySharedBuffersFree(struct ncclComm* comm, int cuda, int type, int channel, int size, char* ptr);
|
||||
ncclResult_t ncclProxySharedBuffersDestroy(struct ncclComm* comm);
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
// Spin wait until func evaluates to true
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
@@ -21,6 +21,7 @@
|
||||
#define SLEEP_INT 1000 // connection retry sleep interval in usec
|
||||
#define RETRY_REFUSED_TIMES 2e4 // connection refused retry times before reporting a timeout (20 sec)
|
||||
#define RETRY_TIMEDOUT_TIMES 3 // connection timed out retry times (each one can take 20s)
|
||||
#define SOCKET_NAME_MAXLEN (NI_MAXHOST+NI_MAXSERV)
|
||||
|
||||
/* Common socket address storage structure for IPv4/IPv6 */
|
||||
union socketAddress {
|
||||
@@ -64,7 +65,7 @@ static inline int envSocketFamily(void) {
|
||||
|
||||
static int findInterfaces(const char* prefixList, char* names, union socketAddress *addrs, int sock_family, int maxIfNameSize, int maxIfs) {
|
||||
#ifdef ENABLE_TRACE
|
||||
char line[1024];
|
||||
char line[SOCKET_NAME_MAXLEN+1];
|
||||
#endif
|
||||
struct netIf userIfs[MAX_IFS];
|
||||
bool searchNot = prefixList && prefixList[0] == '^';
|
||||
@@ -167,9 +168,9 @@ static bool matchSubnet(struct ifaddrs local_if, union socketAddress* remote) {
|
||||
|
||||
static int findInterfaceMatchSubnet(char* ifNames, union socketAddress* localAddrs, union socketAddress* remoteAddr, int ifNameMaxSize, int maxIfs) {
|
||||
#ifdef ENABLE_TRACE
|
||||
char line[1024];
|
||||
char line[SOCKET_NAME_MAXLEN+1];
|
||||
#endif
|
||||
char line_a[1024];
|
||||
char line_a[SOCKET_NAME_MAXLEN+1];
|
||||
int found = 0;
|
||||
struct ifaddrs *interfaces, *interface;
|
||||
getifaddrs(&interfaces);
|
||||
@@ -355,7 +356,7 @@ static ncclResult_t createListenSocket(int *fd, union socketAddress *localAddr)
|
||||
SYSCHECK(getsockname(sockfd, &localAddr->sa, &size), "getsockname");
|
||||
|
||||
#ifdef ENABLE_TRACE
|
||||
char line[1024];
|
||||
char line[SOCKET_NAME_MAXLEN+1];
|
||||
TRACE(NCCL_INIT|NCCL_NET,"Listening on socket %s", socketToString(&localAddr->sa, line));
|
||||
#endif
|
||||
|
||||
@@ -370,6 +371,10 @@ static ncclResult_t createListenSocket(int *fd, union socketAddress *localAddr)
|
||||
static ncclResult_t connectAddress(int* fd, union socketAddress* remoteAddr) {
|
||||
/* IPv4/IPv6 support */
|
||||
int family = remoteAddr->sa.sa_family;
|
||||
if (family != AF_INET && family != AF_INET6) {
|
||||
WARN("Error : connecting to address with family %d is neither AF_INET(%d) nor AF_INET6(%d)\n", family, AF_INET, AF_INET6);
|
||||
return ncclInternalError;
|
||||
}
|
||||
int salen = (family == AF_INET) ? sizeof(sockaddr_in) : sizeof(sockaddr_in6);
|
||||
|
||||
/* Connect to a hostname / port */
|
||||
@@ -386,10 +391,8 @@ static ncclResult_t connectAddress(int* fd, union socketAddress* remoteAddr) {
|
||||
SYSCHECK(setsockopt(*fd, SOL_SOCKET, SO_SNDBUF, (char*)&bufsize, sizeof(int)), "setsockopt");
|
||||
SYSCHECK(setsockopt(*fd, SOL_SOCKET, SO_RCVBUF, (char*)&bufsize, sizeof(int)), "setsockopt");*/
|
||||
|
||||
char line[1024];
|
||||
#ifdef ENABLE_TRACE
|
||||
char line[SOCKET_NAME_MAXLEN+1];
|
||||
TRACE(NCCL_INIT|NCCL_NET,"Connecting to socket %s", socketToString(&remoteAddr->sa, line));
|
||||
#endif
|
||||
|
||||
int ret;
|
||||
int timedout_retries = 0;
|
||||
@@ -450,7 +453,7 @@ static ncclResult_t socketSend(int fd, void* ptr, int size) {
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
static ncclResult_t socketReceive(int fd, void* ptr, int size) {
|
||||
static ncclResult_t socketRecv(int fd, void* ptr, int size) {
|
||||
int offset = 0;
|
||||
NCCLCHECK(socketWait(NCCL_SOCKET_RECV, fd, ptr, size, &offset));
|
||||
return ncclSuccess;
|
||||
|
||||
@@ -41,8 +41,8 @@ struct ncclConnect {
|
||||
};
|
||||
|
||||
struct ncclTransportComm {
|
||||
ncclResult_t (*setup)(struct ncclTopoSystem* topo, struct ncclTopoGraph* graph, struct ncclPeerInfo*, struct ncclPeerInfo*, struct ncclConnect*, struct ncclConnector*, int channelId);
|
||||
ncclResult_t (*connect)(struct ncclConnect*, int nranks, int rank, struct ncclConnector*);
|
||||
ncclResult_t (*setup)(struct ncclComm* comm, struct ncclTopoGraph* graph, struct ncclPeerInfo*, struct ncclPeerInfo*, struct ncclConnect*, struct ncclConnector*, int channelId);
|
||||
ncclResult_t (*connect)(struct ncclComm* comm, struct ncclConnect*, int nranks, int rank, struct ncclConnector*);
|
||||
ncclResult_t (*free)(void*);
|
||||
ncclResult_t (*proxy)(struct ncclProxyArgs*);
|
||||
};
|
||||
@@ -54,6 +54,7 @@ struct ncclTransport {
|
||||
struct ncclTransportComm recv;
|
||||
};
|
||||
|
||||
ncclResult_t ncclTransportP2pSetup(struct ncclComm* comm, struct ncclTopoGraph* graph, struct ncclChannel* channel, int nrecv, int* peerRecv, int nsend, int* peerSend);
|
||||
ncclResult_t ncclTransportP2pConnect(struct ncclComm* comm, struct ncclChannel* channel, int nrecv, int* peerRecv, int nsend, int* peerSend);
|
||||
ncclResult_t ncclTransportP2pSetup(struct ncclComm* comm, struct ncclTopoGraph* graph);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
@@ -7,7 +7,7 @@
|
||||
#ifndef NCCL_TREES_H_
|
||||
#define NCCL_TREES_H_
|
||||
|
||||
ncclResult_t ncclGetBtree(int nranks, int rank, int* u0, int* d1, int* d0);
|
||||
ncclResult_t ncclGetDtree(int nranks, int rank, int* u0, int* d0_0, int* d0_1, int* u1, int* d1_0, int* d1_1);
|
||||
ncclResult_t ncclGetBtree(int nranks, int rank, int* u0, int* d1, int* d0, int* parentChildType);
|
||||
ncclResult_t ncclGetDtree(int nranks, int rank, int* u0, int* d0_0, int* d0_1, int* parentChildType0, int* u1, int* d1_0, int* d1_1, int* parentChildType1);
|
||||
|
||||
#endif
|
||||
|
||||
Verwijs in nieuw issue
Block a user