Fix build with hip-clang
Two minor issues are solved: + Enclose the kernel function with parenthesis as hip-clang defines `hipLaunchKernelGGL` as macro. + Need to explicitly include <hsa.h> for hip-clang.
Tento commit je obsažen v:
@@ -146,6 +146,7 @@ if("${HIP_COMPILER}" MATCHES "clang")
|
||||
PRIVATE --amdgpu-target=gfx908
|
||||
PRIVATE -fgpu-rdc)
|
||||
target_link_libraries(rccl PRIVATE -fgpu-rdc)
|
||||
target_include_directories(rccl PRIVATE /opt/rocm/include)
|
||||
endif()
|
||||
|
||||
if("${HIP_COMPILER}" MATCHES "hcc")
|
||||
|
||||
@@ -71,7 +71,7 @@ ncclResult_t ncclLaunchCooperativeKernelMultiDevice(hipLaunchParams *paramsList,
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
hipLaunchParams* params = paramsList+i;
|
||||
CUDACHECK(hipSetDevice(cudaDevs[i]));
|
||||
hipLaunchKernelGGL((void (*)(struct ncclColl))params->func, params->gridDim, params->blockDim, params->sharedMem, params->stream, **((struct ncclColl **)(params->args)));
|
||||
hipLaunchKernelGGL(((void (*)(struct ncclColl))params->func), params->gridDim, params->blockDim, params->sharedMem, params->stream, **((struct ncclColl **)(params->args)));
|
||||
}
|
||||
CUDACHECK(hipSetDevice(savedDev));
|
||||
return ncclSuccess;
|
||||
@@ -185,7 +185,7 @@ ncclResult_t ncclBarrierEnqueueWait(ncclComm_t comm) {
|
||||
|
||||
hipLaunchParams *params = comm->myParams;
|
||||
if (comm->launchMode == ncclComm::PARALLEL) {
|
||||
hipLaunchKernelGGL((void (*)(struct ncclColl))params->func, params->gridDim, params->blockDim, params->sharedMem, params->stream, **((struct ncclColl **)(params->args)));
|
||||
hipLaunchKernelGGL(((void (*)(struct ncclColl))params->func), params->gridDim, params->blockDim, params->sharedMem, params->stream, **((struct ncclColl **)(params->args)));
|
||||
}
|
||||
// Start the network proxies as soon as the kernel has been launched. We can't
|
||||
// perform any CUDA call between the two or having a hipFree between the CUDA
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "core.h"
|
||||
#include "common_coll.h"
|
||||
#include <hsa.h>
|
||||
|
||||
extern struct ncclTransport p2pTransport;
|
||||
extern struct ncclTransport shmTransport;
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele