From 4b5bf9f2276195c14521b0393a1aa3b41e2bec03 Mon Sep 17 00:00:00 2001 From: Michael LIAO Date: Wed, 31 Jul 2019 14:59:03 -0400 Subject: [PATCH] Fix build with hip-clang Two minor issues are solved: + Enclose the kernel function with parenthesis as hip-clang defines `hipLaunchKernelGGL` as macro. + Need to explicitly include for hip-clang. [ROCm/rccl commit: 41310144f65edaef4834464ba3af3b3510065ecd] --- projects/rccl/CMakeLists.txt | 1 + projects/rccl/src/misc/enqueue.cu | 4 ++-- projects/rccl/src/transport.cu | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/projects/rccl/CMakeLists.txt b/projects/rccl/CMakeLists.txt index 4c0aedf076..39a0cbcc94 100644 --- a/projects/rccl/CMakeLists.txt +++ b/projects/rccl/CMakeLists.txt @@ -146,6 +146,7 @@ if("${HIP_COMPILER}" MATCHES "clang") PRIVATE --amdgpu-target=gfx908 PRIVATE -fgpu-rdc) target_link_libraries(rccl PRIVATE -fgpu-rdc) + target_include_directories(rccl PRIVATE /opt/rocm/include) endif() if("${HIP_COMPILER}" MATCHES "hcc") diff --git a/projects/rccl/src/misc/enqueue.cu b/projects/rccl/src/misc/enqueue.cu index 8be35612dc..eb56de55ae 100644 --- a/projects/rccl/src/misc/enqueue.cu +++ b/projects/rccl/src/misc/enqueue.cu @@ -71,7 +71,7 @@ ncclResult_t ncclLaunchCooperativeKernelMultiDevice(hipLaunchParams *paramsList, for (int i = 0; i < numDevices; i++) { hipLaunchParams* params = paramsList+i; CUDACHECK(hipSetDevice(cudaDevs[i])); - hipLaunchKernelGGL((void (*)(struct ncclColl))params->func, params->gridDim, params->blockDim, params->sharedMem, params->stream, **((struct ncclColl **)(params->args))); + hipLaunchKernelGGL(((void (*)(struct ncclColl))params->func), params->gridDim, params->blockDim, params->sharedMem, params->stream, **((struct ncclColl **)(params->args))); } CUDACHECK(hipSetDevice(savedDev)); return ncclSuccess; @@ -185,7 +185,7 @@ ncclResult_t ncclBarrierEnqueueWait(ncclComm_t comm) { hipLaunchParams *params = comm->myParams; if (comm->launchMode == ncclComm::PARALLEL) { - hipLaunchKernelGGL((void (*)(struct ncclColl))params->func, params->gridDim, params->blockDim, params->sharedMem, params->stream, **((struct ncclColl **)(params->args))); + hipLaunchKernelGGL(((void (*)(struct ncclColl))params->func), params->gridDim, params->blockDim, params->sharedMem, params->stream, **((struct ncclColl **)(params->args))); } // Start the network proxies as soon as the kernel has been launched. We can't // perform any CUDA call between the two or having a hipFree between the CUDA diff --git a/projects/rccl/src/transport.cu b/projects/rccl/src/transport.cu index a910bbbd8d..7d83ff4488 100644 --- a/projects/rccl/src/transport.cu +++ b/projects/rccl/src/transport.cu @@ -7,6 +7,7 @@ #include "core.h" #include "common_coll.h" +#include extern struct ncclTransport p2pTransport; extern struct ncclTransport shmTransport;