Added new gpu targets (#631)

This commit is contained in:
akolliasAMD
2022-09-29 14:53:55 -06:00
gecommit door GitHub
bovenliggende a523b37ac7
commit ef71550738
3 gewijzigde bestanden met toevoegingen van 71 en 11 verwijderingen
+2 -2
Bestand weergeven
@@ -34,12 +34,12 @@ set(CMAKE_INSTALL_PREFIX "${ROCM_PATH}" CACHE PATH "")
#Set the AMDGPU_TARGETS with backward compatiblity
if(COMMAND rocm_check_target_ids)
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx1030"
TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx1030;gfx1100;gfx1101;gfx1102"
)
else()
# Use target ID syntax if supported for AMDGPU_TARGETS
if(TARGET_ID_SUPPORT)
set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx1030")
set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx1030;gfx1100;gfx1101;gfx1102")
else()
set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908")
endif()
+68 -8
Bestand weergeven
@@ -330,13 +330,13 @@ class ncclFunction {
};
#ifdef ENABLE_COLLTRACE
#define traceColl(launch_type) { \
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__)
#define traceColl(launch_type) { \
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
collTrace->timeStamp = __builtin_amdgcn_s_memrealtime(); \
collTrace->timeStamp = wall_clock64(); \
collTrace->bid = blockIdx.x; \
collTrace->funcIndex = ncclShmem.work.header.funcIndex; \
asm volatile ("s_getreg_b32 %0, hwreg(HW_REG_HW_ID)" : "=s" (collTrace->data_0)); \
if (ncclShmem.work.header.type == ncclWorkTypeP2p) { \
struct ncclWorkElemP2p *p2pElems = ncclShmem.work.p2pElems; \
collTrace->p2p[0].connIndex = 0; \
@@ -362,25 +362,84 @@ class ncclFunction {
} \
}
#define traceKernelLaunch(firstLaunch) { \
#define traceKernelLaunch(firstLaunch) { \
traceColl(firstLaunch?ncclCollTraceKernelLaunchType:ncclCollTraceCollLaunchType); \
}
#define traceKernelEnd() { \
#define traceKernelEnd() { \
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
collTrace->timeStamp = wall_clock64(); \
collTrace->bid = blockIdx.x; \
collTrace->type = ncclCollTraceKernelEndType; \
}
#define traceAbort() { \
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
collTrace->timeStamp = wall_clock64(); \
collTrace->bid = blockIdx.x; \
collTrace->type = ncclCollTraceAbortType; \
}
#define traceData(data2, data4, data8_0, data8_1) { \
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
collTrace->bid = blockIdx.x; \
collTrace->timeStamp = wall_clock64(); \
collTrace->funcIndex = data2; \
collTrace->data_0 = data4; \
collTrace->opCount = data8_0; \
collTrace->data_1 = data8_1; \
collTrace->type = ncclCollTraceDataType; \
}
#else
#define traceColl(launch_type) { \
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
collTrace->timeStamp = __builtin_amdgcn_s_memrealtime(); \
collTrace->bid = blockIdx.x; \
collTrace->funcIndex = ncclShmem.work.header.funcIndex; \
asm volatile ("s_getreg_b32 %0, hwreg(HW_REG_HW_ID)" : "=s" (collTrace->data_0)); \
if (ncclShmem.work.header.type == ncclWorkTypeP2p) { \
struct ncclWorkElemP2p *p2pElems = ncclShmem.work.p2pElems; \
collTrace->p2p[0].connIndex = 0; \
collTrace->p2pOpCount[0] = p2pElems[0].opCount; \
collTrace->p2p[0].ngroups = p2pElems[0].ngroups; \
collTrace->p2p[0].nWarps = p2pElems[0].nWarps; \
collTrace->p2p[0].warpStart = p2pElems[0].warpStart; \
collTrace->p2p[0].peer = p2pElems[0].p2pType == ncclWorkP2pTypeRecv ? (uint16_t)(p2pElems[0].peer) : -1; \
collTrace->p2p[1].connIndex = 0; \
collTrace->p2pOpCount[1] = p2pElems[1].opCount; \
collTrace->p2p[1].ngroups = p2pElems[1].ngroups; \
collTrace->p2p[1].nWarps = p2pElems[1].nWarps; \
collTrace->p2p[1].warpStart = p2pElems[1].warpStart; \
collTrace->p2p[1].peer = p2pElems[1].p2pType == ncclWorkP2pTypeSend ? (uint16_t)(p2pElems[1].peer) : -1; \
collTrace->type = (launch_type) | ncclCollTraceP2pElemType; \
} else if (ncclShmem.work.header.type == ncclWorkTypeColl) { \
struct ncclWorkElem *elems = ncclShmem.work.elems; \
collTrace->opCount = elems[0].opCount; \
collTrace->coll.nWarps = elems[0].nWarps; \
collTrace->coll.bid = elems[0].bid; \
collTrace->coll.nChannels = elems[0].nChannels; \
collTrace->type = (launch_type) | ncclCollTraceCollElemType; \
} \
}
#define traceKernelLaunch(firstLaunch) { \
traceColl(firstLaunch?ncclCollTraceKernelLaunchType:ncclCollTraceCollLaunchType); \
}
#define traceKernelEnd() { \
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
collTrace->timeStamp = __builtin_amdgcn_s_memrealtime(); \
collTrace->bid = blockIdx.x; \
collTrace->type = ncclCollTraceKernelEndType; \
}
#define traceAbort() { \
#define traceAbort() { \
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
collTrace->timeStamp = __builtin_amdgcn_s_memrealtime(); \
collTrace->bid = blockIdx.x; \
collTrace->type = ncclCollTraceAbortType; \
}
// traceData(int16_t data2, uint32_t data4, uint64_t data8_0, uint64_t data8_1)
#define traceData(data2, data4, data8_0, data8_1) { \
#define traceData(data2, data4, data8_0, data8_1) { \
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
collTrace->bid = blockIdx.x; \
@@ -391,6 +450,7 @@ class ncclFunction {
collTrace->data_1 = data8_1; \
collTrace->type = ncclCollTraceDataType; \
}
#endif
#else
#define traceData(data2, data4, data8_0, data8_1)
#endif
+1 -1
Bestand weergeven
@@ -297,7 +297,7 @@ private:
}
}
#if !defined(__gfx1030__)
#if !defined(__gfx1030__) && !defined(__gfx1100__) && !defined(__gfx1101__) && !defined(__gfx1102__)
if (tid == 0) __asm__ __volatile__("buffer_wbinvl1_vol");
#endif
/************************ Send **************************/