From 303ba600d6f227d684865d3138c9ede24a620b2e Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Thu, 16 May 2019 20:36:51 +0000 Subject: [PATCH 01/24] Fix hipcc exit code when failing --- hipamd/bin/hipcc | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index fadc74c137..012545e5d4 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -848,7 +848,21 @@ if ($runCmd) { print ("HIP ($HIP_PATH) was built using hcc $hipConfig{'HCC_VERSION'}, but you are using $HCC_HOME/hcc with version $HCC_VERSION from hipcc. Please rebuild HIP including cmake or update HCC_HOME variable.\n") ; die unless $ENV{'HIP_IGNORE_HCC_VERSION'}; } - system ("$CMD") or delete_temp_dirs () and die (); + system ("$CMD"); + if ($? == -1) { + print "failed to execute: $!\n"; + exit($?); + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + exit($?); + } + else { + $CMD_EXIT_CODE = $? >> 8; + } + $? or delete_temp_dirs (); + exit($CMD_EXIT_CODE); } # vim: ts=4:sw=4:expandtab:smartindent From adf3b8774e04f02239aedc18d541803f653c9853 Mon Sep 17 00:00:00 2001 From: Konstantin Pyzhov Date: Mon, 20 May 2019 02:13:30 -0400 Subject: [PATCH 02/24] Disable hipStreamSync2 test for hip-clang. --- hipamd/tests/src/runtimeApi/stream/hipStreamSync2.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hipamd/tests/src/runtimeApi/stream/hipStreamSync2.cpp b/hipamd/tests/src/runtimeApi/stream/hipStreamSync2.cpp index c365dce70a..8160ac4bc3 100644 --- a/hipamd/tests/src/runtimeApi/stream/hipStreamSync2.cpp +++ b/hipamd/tests/src/runtimeApi/stream/hipStreamSync2.cpp @@ -178,7 +178,9 @@ void runTests(int64_t numElements) { { test(0x01, C_d, C_h, numElements, syncNone, true /*expectMismatch*/); test(0x02, C_d, C_h, numElements, syncNullStream, false /*expectMismatch*/); +#ifndef __HIP_CLANG_ONLY__ test(0x04, C_d, C_h, numElements, syncOtherStream, true /*expectMismatch*/); +#endif test(0x08, C_d, C_h, numElements, syncDevice, false /*expectMismatch*/); // Sending a marker to to null stream may synchronize the otherStream From d2125f032520399453356788c7e721bf0958108f Mon Sep 17 00:00:00 2001 From: Konstantin Pyzhov Date: Tue, 21 May 2019 13:21:48 -0400 Subject: [PATCH 03/24] Updated kernel binary file for hipModule test. --- .../src/runtimeApi/module/vcpy_kernel.code | Bin 9456 -> 9456 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/hipamd/tests/src/runtimeApi/module/vcpy_kernel.code b/hipamd/tests/src/runtimeApi/module/vcpy_kernel.code index 737ee1ce9b3eedb45e5cf76377c851dccf2aca11..4246151be15f18f6ca4c6924779f4d6b05680982 100755 GIT binary patch delta 186 zcmez1`N4C-46b=>3}7&iZR4UkM$QIi2)ALfCQ~!hU#7|HnBHvWWASEWGBleU&H8eZ z0Lx|{wk@2K4cIFt33xCHO!gGm&B(BsRgjsVX$R|ME(JNpjLDh`@74nb4CD+ CI4BDM delta 210 zcmez1`N4C-3@$Nt1~3p~-?*rbk@E{Pg!^T(CQ~z0A@k&QOm7%XCeLP;0+InNQtVEZ zVTn1Vsgv(8OEG&I8*Of2PGw{=GMW6G<>kZ)ESoQ|?% Date: Wed, 22 May 2019 18:31:39 +0300 Subject: [PATCH 04/24] [HIP][HIPIFY] Make hipMemcpyParam2D coherent with cuMemcpy2D + Makes hip_Memcpy2D struct compatible with CUDA_MEMCPY2D struct + Add hipMemcpyParam2D support in nvcc fallback path + Update hipify-clang, tests and docs accordingly --- ...A_Driver_API_functions_supported_by_HIP.md | 2 +- .../src/CUDA2HIP_Driver_API_functions.cpp | 4 +-- hipamd/include/hip/hcc_detail/driver_types.h | 28 +++++++++---------- .../include/hip/hcc_detail/hip_runtime_api.h | 10 +++++++ .../include/hip/nvcc_detail/hip_runtime_api.h | 12 ++++++++ .../11_texture_driver/texture2dDrv.cpp | 4 +-- hipamd/src/hip_memory.cpp | 4 +-- 7 files changed, 43 insertions(+), 21 deletions(-) diff --git a/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index aeb834c2ec..7c2d2c6631 100644 --- a/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -861,7 +861,7 @@ | `cuMemAllocManaged` | | | `cuMemAllocPitch` | | | `cuMemcpy` | | -| `cuMemcpy2D` | | +| `cuMemcpy2D` | `hipMemcpyParam2D` | | `cuMemcpy2DAsync` | | | `cuMemcpy2DUnaligned` | | | `cuMemcpy3D` | | diff --git a/hipamd/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp b/hipamd/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp index b71e49710d..8be20774ea 100644 --- a/hipamd/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp +++ b/hipamd/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp @@ -175,8 +175,8 @@ const std::map CUDA_DRIVER_FUNCTION_MAP{ {"cuMemcpy", {"hipMemcpy_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // no analogue // NOTE: Not equal to cudaMemcpy2D due to different signatures - {"cuMemcpy2D", {"hipMemcpy2D_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpy2D_v2", {"hipMemcpy2D_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpy2D", {"hipMemcpyParam2D", "", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpy2D_v2", {"hipMemcpyParam2D", "", CONV_MEMORY, API_DRIVER}}, // no analogue // NOTE: Not equal to cudaMemcpy2DAsync due to different signatures {"cuMemcpy2DAsync", {"hipMemcpy2DAsync_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, diff --git a/hipamd/include/hip/hcc_detail/driver_types.h b/hipamd/include/hip/hcc_detail/driver_types.h index 8e1fec11fa..5b2297114f 100644 --- a/hipamd/include/hip/hcc_detail/driver_types.h +++ b/hipamd/include/hip/hcc_detail/driver_types.h @@ -80,22 +80,22 @@ typedef struct hipArray { }hipArray; typedef struct hip_Memcpy2D { - size_t height; - size_t widthInBytes; - hipArray* dstArray; - hipDeviceptr_t dstDevice; - void* dstHost; - hipMemoryType dstMemoryType; - size_t dstPitch; - size_t dstXInBytes; - size_t dstY; - hipArray* srcArray; - hipDeviceptr_t srcDevice; - const void* srcHost; - hipMemoryType srcMemoryType; - size_t srcPitch; size_t srcXInBytes; size_t srcY; + hipMemoryType srcMemoryType; + const void* srcHost; + hipDeviceptr_t srcDevice; + hipArray* srcArray; + size_t srcPitch; + size_t dstXInBytes; + size_t dstY; + hipMemoryType dstMemoryType; + void* dstHost; + hipDeviceptr_t dstDevice; + hipArray* dstArray; + size_t dstPitch; + size_t WidthInBytes; + size_t Height; } hip_Memcpy2D; diff --git a/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/hipamd/include/hip/hcc_detail/hip_runtime_api.h index 5b598b54a8..d870963101 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -1858,6 +1858,16 @@ hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* */ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind); + +/** +* @brief Copies memory for 2D arrays. +* @param[in] pCopy Parameters for the memory copy + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidPitchValue, + * #hipErrorInvalidDevicePointer, #hipErrorInvalidMemcpyDirection + * + * @see hipMemcpy, hipMemcpy2D, hipMemcpyToArray, hipMemcpy2DToArray, hipMemcpyFromArray, + * hipMemcpyToSymbol, hipMemcpyAsync +*/ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy); /** diff --git a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index 2ebd4f8b7d..08897fdb07 100644 --- a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -145,6 +145,12 @@ typedef enum hipChannelFormatKind { #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS +// enum CUmemorytype redefines +#define hipMemoryTypeHost CU_MEMORYTYPE_HOST +#define hipMemoryTypeDevice CU_MEMORYTYPE_DEVICE +#define hipMemoryTypeArray CU_MEMORYTYPE_ARRAY +#define hipMemoryTypeUnified CU_MEMORYTYPE_UNIFIED + typedef cudaEvent_t hipEvent_t; typedef cudaStream_t hipStream_t; typedef cudaIpcEventHandle_t hipIpcEventHandle_t; @@ -162,6 +168,8 @@ typedef CUdeviceptr hipDeviceptr_t; typedef struct cudaArray hipArray; typedef struct cudaArray* hipArray_const_t; typedef cudaFuncAttributes hipFuncAttributes; +typedef enum CUmemorytype hipMemoryType; +typedef struct CUDA_MEMCPY2D hip_Memcpy2D; #define hipMemcpy3DParms cudaMemcpy3DParms #define hipArrayDefault cudaArrayDefault #define hipArrayLayered cudaArrayLayered @@ -578,6 +586,10 @@ inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, cudaMemcpy2D(dst, dpitch, src, spitch, width, height, hipMemcpyKindToCudaMemcpyKind(kind))); } +inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { + return hipCUDAErrorTohipError(cuMemcpy2D(hip_Memcpy2D)); +} + inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p) { return hipCUDAErrorTohipError(cudaMemcpy3D(p)); diff --git a/hipamd/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp b/hipamd/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp index a6ba44696a..36d37a4fad 100644 --- a/hipamd/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp +++ b/hipamd/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp @@ -71,8 +71,8 @@ bool runTest(int argc, char** argv) { copyParam.srcMemoryType = hipMemoryTypeHost; copyParam.srcHost = hData; copyParam.srcPitch = width * sizeof(float); - copyParam.widthInBytes = copyParam.srcPitch; - copyParam.height = height; + copyParam.WidthInBytes = copyParam.srcPitch; + copyParam.Height = height; hipMemcpyParam2D(©Param); textureReference* texref; diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index 0fad8ab890..36edcdb338 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -1715,8 +1715,8 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { if (pCopy == nullptr) { e = hipErrorInvalidValue; } - e = ihipMemcpy2D(pCopy->dstArray->data, pCopy->widthInBytes, pCopy->srcHost, pCopy->srcPitch, - pCopy->widthInBytes, pCopy->height, hipMemcpyDefault); + e = ihipMemcpy2D(pCopy->dstArray->data, pCopy->WidthInBytes, pCopy->srcHost, pCopy->srcPitch, + pCopy->WidthInBytes, pCopy->Height, hipMemcpyDefault); return ihipLogStatus(e); } From 6806ab6745374d83a6f630f84fcc2e9d0d88c580 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 22 May 2019 20:04:45 +0300 Subject: [PATCH 05/24] [HIP] fix nvcc path break in #1127 --- hipamd/include/hip/nvcc_detail/hip_runtime_api.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index 08897fdb07..7bc7b91368 100644 --- a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -145,12 +145,6 @@ typedef enum hipChannelFormatKind { #define hipJitOptionFastCompile CU_JIT_FAST_COMPILE #define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS -// enum CUmemorytype redefines -#define hipMemoryTypeHost CU_MEMORYTYPE_HOST -#define hipMemoryTypeDevice CU_MEMORYTYPE_DEVICE -#define hipMemoryTypeArray CU_MEMORYTYPE_ARRAY -#define hipMemoryTypeUnified CU_MEMORYTYPE_UNIFIED - typedef cudaEvent_t hipEvent_t; typedef cudaStream_t hipStream_t; typedef cudaIpcEventHandle_t hipIpcEventHandle_t; @@ -168,8 +162,7 @@ typedef CUdeviceptr hipDeviceptr_t; typedef struct cudaArray hipArray; typedef struct cudaArray* hipArray_const_t; typedef cudaFuncAttributes hipFuncAttributes; -typedef enum CUmemorytype hipMemoryType; -typedef struct CUDA_MEMCPY2D hip_Memcpy2D; +#define hip_Memcpy2D CUDA_MEMCPY2D #define hipMemcpy3DParms cudaMemcpy3DParms #define hipArrayDefault cudaArrayDefault #define hipArrayLayered cudaArrayLayered @@ -587,7 +580,7 @@ inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, } inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { - return hipCUDAErrorTohipError(cuMemcpy2D(hip_Memcpy2D)); + return hipCUResultTohipError(cuMemcpy2D(hip_Memcpy2D)); } inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p) From 49b9df7a9e78dc72ad97148ce1b3aa20c569490f Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 22 May 2019 20:48:18 +0300 Subject: [PATCH 06/24] [HIP] fix typo in #1127 --- hipamd/include/hip/nvcc_detail/hip_runtime_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index 7bc7b91368..c1846c1b1e 100644 --- a/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -580,7 +580,7 @@ inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, } inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { - return hipCUResultTohipError(cuMemcpy2D(hip_Memcpy2D)); + return hipCUResultTohipError(cuMemcpy2D(pCopy)); } inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p) From 2b11a8bf0c865d9b144494173425955465a348a6 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Wed, 22 May 2019 19:20:09 +0000 Subject: [PATCH 07/24] Fix bug in __smid not setting correct size The SZ field should minus by 1 since SIZE range is 1..32. Also add comments that results may vary. --- hipamd/include/hip/hcc_detail/device_functions.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/device_functions.h b/hipamd/include/hip/hcc_detail/device_functions.h index e1f81e4547..808ed216e4 100644 --- a/hipamd/include/hip/hcc_detail/device_functions.h +++ b/hipamd/include/hip/hcc_detail/device_functions.h @@ -1009,14 +1009,20 @@ void __syncthreads() #define GETREG_IMMED(SZ,OFF,REG) (SZ << 11) | (OFF << 6) | REG +/* + __smid returns the wave's assigned Compute Unit and Shader Engine. + The Compute Unit, CU_ID returned in bits 3:0, and Shader Engine, SE_ID in bits 5:4. + Note: the results vary over time. + SZ minus 1 since SIZE is 1-based. +*/ __device__ inline unsigned __smid(void) { unsigned cu_id = __builtin_amdgcn_s_getreg( - GETREG_IMMED(HW_ID_CU_ID_SIZE, HW_ID_CU_ID_OFFSET, HW_ID)); + GETREG_IMMED(HW_ID_CU_ID_SIZE-1, HW_ID_CU_ID_OFFSET, HW_ID)); unsigned se_id = __builtin_amdgcn_s_getreg( - GETREG_IMMED(HW_ID_SE_ID_SIZE, HW_ID_SE_ID_OFFSET, HW_ID)); + GETREG_IMMED(HW_ID_SE_ID_SIZE-1, HW_ID_SE_ID_OFFSET, HW_ID)); /* Each shader engine has 16 CU */ return (se_id << HW_ID_CU_ID_SIZE) + cu_id; From e481012f43429be3faa699050cf52cae88d96acb Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Wed, 22 May 2019 12:19:51 -0700 Subject: [PATCH 08/24] Update kernel language documentation --- hipamd/docs/markdown/hip_kernel_language.md | 67 +++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/hipamd/docs/markdown/hip_kernel_language.md b/hipamd/docs/markdown/hip_kernel_language.md index d69f5a04a8..5479813675 100644 --- a/hipamd/docs/markdown/hip_kernel_language.md +++ b/hipamd/docs/markdown/hip_kernel_language.md @@ -35,6 +35,9 @@ - [Warp Cross-Lane Functions](#warp-cross-lane-functions) * [Warp Vote and Ballot Functions](#warp-vote-and-ballot-functions) * [Warp Shuffle Functions](#warp-shuffle-functions) +- [Cooperative Groups Functions](#cooperative-groups-functions) +- [Warp Matrix Functions](#warp-matrix-functions) +- [Independent Thread Scheduling](#independent-thread-scheduling) - [Profiler Counter Function](#profiler-counter-function) - [Assert](#assert) - [Printf](#printf) @@ -599,6 +602,70 @@ float __shfl_xor (float var, int laneMask, int width=warpSize); ``` +## Cooperative Groups Functions + +Cooperative groups is a mechanism for forming and communicating between groups of threads at +a granularity different than the block. This feature was introduced in Cuda 9. + +HIP does not support any of the kernel language cooperative groups +types or functions. + + +| **Function** | **Supported in HIP** | **Supported in CUDA** | +| --- | --- | --- | +| `void thread_group.sync()` | | ✓ | +| `unsigned thread_group.size()` | | ✓ | +| `unsigned thread_group.thread_rank()` | | ✓ | +| `bool thread_group.is_valid()` | | ✓ | +| `thread_group tiled_partition(thread_group, size)` | | ✓ | +| `thread_block_tile tiled_partition(thread_group)` | | ✓ | +| `thread_block this_thread_block()` | | ✓ | +| `T thread_block_tile.shfl()` | | ✓ | +| `T thread_block_tile.shfl_down()` | | ✓ | +| `T thread_block_tile.shfl_up()` | | ✓ | +| `T thread_block_tile.shfl_xor()` | | ✓ | +| `T thread_block_tile.any()` | | ✓ | +| `T thread_block_tile.all()` | | ✓ | +| `T thread_block_tile.ballot()` | | ✓ | +| `T thread_block_tile.match_any()` | | ✓ | +| `T thread_block_tile.match_all()` | | ✓ | +| `coalesced_group coalesced_threads()` | | ✓ | +| `grid_group this_grid()` | | ✓ | +| `void grid_group.sync()` | | ✓ | +| `unsigned grid_group.size()` | | ✓ | +| `unsigned grid_group.thread_rank()` | | ✓ | +| `bool grid_group.is_valid()` | | ✓ | +| `multi_grid_group this_multi_grid()` | | ✓ | +| `void multi_grid_group.sync()` | | ✓ | +| `unsigned multi_grid_group.size()` | | ✓ | +| `unsigned multi_grid_group.thread_rank()` | | ✓ | +| `bool multi_grid_group.is_valid()` | | ✓ | + +## Warp Matrix Functions + +Warp matrix functions allow a warp to cooperatively operate on small matrices +whose elements are spread over the lanes in an unspecified manner. This feature +was introduced in Cuda 9. + +HIP does not support any of the kernel language warp matrix +types or functions. + +| **Function** | **Supported in HIP** | **Supported in CUDA** | +| --- | --- | --- | +| `void load_matrix_sync(fragment<...> &a, const T* mptr, unsigned lda)` | | ✓ | +| `void load_matrix_sync(fragment<...> &a, const T* mptr, unsigned lda, layout_t layout)` | | ✓ | +| `void store_matrix_sync(T* mptr, fragment<...> &a, unsigned lda, layout_t layout)` | | ✓ | +| `void fill_fragment(fragment<...> &a, const T &value)` | | ✓ | +| `void mma_sync(fragment<...> &d, const fragment<...> &a, const fragment<...> &b, const fragment<...> &c , bool sat)` | | ✓ | + +## Independent Thread Scheduling + +The hardware support for independent thread scheduling introduced in certain architectures +supporting Cuda allows threads to progress independently of each other and enables +intra-warp synchronizations that were previously not allowed. + +HIP does not support this type of scheduling. + ## Profiler Counter Function The Cuda `__prof_trigger()` instruction is not supported. From 7db992cf44c6b042d4f3035d0d0656af3fcf7d85 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 23 May 2019 12:47:08 +0300 Subject: [PATCH 09/24] [HIPIFY] Add device texture fetch functions support + Add a corresponding reverse engineered sample tex2dKernel with texture template --- hipamd/hipify-clang/src/CUDA2HIP.cpp | 1 + hipamd/hipify-clang/src/HipifyAction.cpp | 3 ++ .../11_texture_driver/tex2dKernel.cpp | 36 +++++++++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 hipamd/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp diff --git a/hipamd/hipify-clang/src/CUDA2HIP.cpp b/hipamd/hipify-clang/src/CUDA2HIP.cpp index 9a26a051ca..1e530745e6 100644 --- a/hipamd/hipify-clang/src/CUDA2HIP.cpp +++ b/hipamd/hipify-clang/src/CUDA2HIP.cpp @@ -33,6 +33,7 @@ const std::map CUDA_INCLUDE_MAP{ {"driver_types.h", {"hip/driver_types.h", "", CONV_INCLUDE, API_RUNTIME}}, {"cuda_fp16.h", {"hip/hip_fp16.h", "", CONV_INCLUDE, API_RUNTIME}}, {"cuda_texture_types.h", {"hip/hip_texture_types.h", "", CONV_INCLUDE, API_RUNTIME}}, + {"texture_fetch_functions.h", {"", "", CONV_INCLUDE, API_RUNTIME}}, {"vector_types.h", {"hip/hip_vector_types.h", "", CONV_INCLUDE, API_RUNTIME}}, {"cuda_profiler_api.h", {"hip/hip_profile.h", "", CONV_INCLUDE, API_RUNTIME}}, // cuComplex includes diff --git a/hipamd/hipify-clang/src/HipifyAction.cpp b/hipamd/hipify-clang/src/HipifyAction.cpp index 5ea83c8376..241ca7ecae 100644 --- a/hipamd/hipify-clang/src/HipifyAction.cpp +++ b/hipamd/hipify-clang/src/HipifyAction.cpp @@ -189,6 +189,9 @@ bool HipifyAction::Exclude(const hipCounter & hipToken) { } return false; case CONV_INCLUDE: + if (hipToken.hipName.empty()) { + return true; + } switch (hipToken.apiType) { case API_RAND: if (hipToken.hipName == "hiprand_kernel.h") { diff --git a/hipamd/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp b/hipamd/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp new file mode 100644 index 0000000000..d5dffd0b09 --- /dev/null +++ b/hipamd/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp @@ -0,0 +1,36 @@ +// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args +/* +Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +// CHECK: #include +#include +// CHECK-NOT: #include +#include + +// CHECK: extern texture tex; +extern texture tex; + +extern "C" __global__ void tex2dKernel(float* outputData, int width, int height) { + int x = blockDim.x * blockIdx.x + threadIdx.x; + int y = blockDim.y * blockIdx.y + threadIdx.y; + outputData[y * width + x] = tex2D(tex, x, y); +} From 636057053269840e687aa5d5441d3b1d6eb0446a Mon Sep 17 00:00:00 2001 From: Yaxun Sam Liu Date: Thu, 23 May 2019 15:11:02 -0400 Subject: [PATCH 10/24] Fix device lib path for hip-clang. We now always need device lib path when compiling and not need it at linking. --- hipamd/bin/hipcc | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index fadc74c137..e46fe5604f 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -370,7 +370,6 @@ if($HIP_PLATFORM eq "nvcc"){ my $toolArgs = ""; # arguments to pass to the hcc or nvcc tool my $optArg = ""; # -O args -my $rdc = 0; foreach $arg (@ARGV) { @@ -466,14 +465,6 @@ foreach $arg (@ARGV) { $optArg = $arg; } - if($arg =~ /-fgpu-rdc/) - { - $rdc = 1; - } - if($arg =~ /-fno-gpu-rdc/) - { - $rdc = 0; - } ## process linker response file for hip-clang ## extract object files from static library and pass them directly to @@ -804,11 +795,7 @@ if ($HIP_PLATFORM eq "clang") { $HIPLDFLAGS .= " -O3"; } $HIP_DEVLIB_FLAGS = " --hip-device-lib-path=$DEVICE_LIB_PATH"; - if ($rdc eq 0) { - $HIPCXXFLAGS .= " $HIP_DEVLIB_FLAGS"; - } else { - $HIPLDFLAGS .= " $HIP_DEVLIB_FLAGS"; - } + $HIPCXXFLAGS .= " $HIP_DEVLIB_FLAGS"; if ($isWindows) { $HIPCXXFLAGS .= " -std=c++14 -fms-extensions -fms-compatibility"; } else { From de891025285aac08815d7b454685e6dd4f937095 Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Thu, 23 May 2019 18:03:32 -0700 Subject: [PATCH 11/24] Add support for code object v3 Use the code object manager library to parse the code object metadata. Both code object v2 and v3 formats are now supported for HCC generated binaries. --- hipamd/CMakeLists.txt | 15 +++ hipamd/src/hip_module.cpp | 82 +++++++++--- hipamd/src/program_state.inl | 236 +++++++++++++++++++++++------------ 3 files changed, 237 insertions(+), 96 deletions(-) diff --git a/hipamd/CMakeLists.txt b/hipamd/CMakeLists.txt index ba855ad86b..8701fe5635 100644 --- a/hipamd/CMakeLists.txt +++ b/hipamd/CMakeLists.txt @@ -271,6 +271,21 @@ if(HIP_PLATFORM STREQUAL "hcc") target_link_libraries(hiprtc PUBLIC stdc++fs) endif() + + if(HIP_PLATFORM STREQUAL "hcc") + find_package(amd_comgr REQUIRED CONFIG + PATHS + /opt/rocm/ + PATH_SUFFIXES + cmake/amd_comgr + lib/cmake/amd_comgr + ) + MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.") + endif() + + target_link_libraries(hip_hcc PRIVATE amd_comgr) + target_link_libraries(hip_hcc_static PRIVATE amd_comgr) + string(REPLACE " " ";" HCC_CXX_FLAGS_LIST ${HCC_CXX_FLAGS}) foreach(TARGET hip_hcc hip_hcc_static) target_include_directories(${TARGET} SYSTEM INTERFACE $/include>;${HSA_PATH}/include) diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index e8a8801e98..ed8db42c86 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -55,6 +55,18 @@ THE SOFTWARE. using namespace ELFIO; using namespace std; +struct amd_kernel_code_v3_t { + uint32_t group_segment_fixed_size; + uint32_t private_segment_fixed_size; + uint8_t reserved0[8]; + int64_t kernel_code_entry_byte_offset; + uint8_t reserved1[24]; + uint32_t compute_pgm_rsrc1; + uint32_t compute_pgm_rsrc2; + uint16_t kernel_code_properties; + uint8_t reserved2[6]; +}; + // calculate MD5 checksum inline std::string checksum(size_t size, const char *source) { // FNV-1a hashing, 64-bit version @@ -206,10 +218,20 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, aql.grid_size_x = globalWorkSizeX; aql.grid_size_y = globalWorkSizeY; aql.grid_size_z = globalWorkSizeZ; - aql.group_segment_size = - f->_header->workgroup_group_segment_byte_size + sharedMemBytes; - aql.private_segment_size = - f->_header->workitem_private_segment_byte_size; + bool is_code_object_v3 = f->_name.find(".kd") != std::string::npos; + if (is_code_object_v3) { + const auto* header = + reinterpret_cast(f->_header); + aql.group_segment_size = + header->group_segment_fixed_size + sharedMemBytes; + aql.private_segment_size = + header->private_segment_fixed_size; + } else { + aql.group_segment_size = + f->_header->workgroup_group_segment_byte_size + sharedMemBytes; + aql.private_segment_size = + f->_header->workitem_private_segment_byte_size; + } aql.kernel_object = f->_object; aql.setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; aql.header = @@ -462,6 +484,12 @@ hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const ch auto kernel = find_kernel_by_name(hmod->executable, name, agent); + if (kernel.handle == 0u) { + std::string name_str(name); + name_str.append(".kd"); + kernel = find_kernel_by_name(hmod->executable, name_str.c_str(), agent); + } + if (kernel.handle == 0u) return hipErrorNotFound; // TODO: refactor the whole ihipThisThat, which is a mess and yields the @@ -486,7 +514,11 @@ hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod, } namespace { -hipFuncAttributes make_function_attributes(const amd_kernel_code_t& header) { +const amd_kernel_code_v3_t *header_v3(const ihipModuleSymbol_t& kd) { + return reinterpret_cast(kd._header); +} + +hipFuncAttributes make_function_attributes(const ihipModuleSymbol_t& kd) { hipFuncAttributes r{}; hipDeviceProp_t prop{}; @@ -495,16 +527,31 @@ hipFuncAttributes make_function_attributes(const amd_kernel_code_t& header) { // available per CU, therefore we hardcode it to 64 KiRegisters. prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; - r.localSizeBytes = header.workitem_private_segment_byte_size; - r.sharedSizeBytes = header.workgroup_group_segment_byte_size; + bool is_code_object_v3 = kd._name.find(".kd") != std::string::npos; + if (is_code_object_v3) { + r.localSizeBytes = header_v3(kd)->private_segment_fixed_size; + r.sharedSizeBytes = header_v3(kd)->group_segment_fixed_size; + } else { + r.localSizeBytes = kd._header->workitem_private_segment_byte_size; + r.sharedSizeBytes = kd._header->workgroup_group_segment_byte_size; + } r.maxDynamicSharedSizeBytes = prop.sharedMemPerBlock - r.sharedSizeBytes; - r.numRegs = header.workitem_vgpr_count; + if (is_code_object_v3) { + r.numRegs = ((header_v3(kd)->compute_pgm_rsrc1 & 0x3F) + 1) << 2; + } else { + r.numRegs = kd._header->workitem_vgpr_count; + } r.maxThreadsPerBlock = r.numRegs ? std::min(prop.maxThreadsPerBlock, prop.regsPerBlock / r.numRegs) : prop.maxThreadsPerBlock; - r.binaryVersion = - header.amd_machine_version_major * 10 + - header.amd_machine_version_minor; + if (is_code_object_v3) { + r.binaryVersion = 0; // FIXME: should it be the ISA version or code + // object format version? + } else { + r.binaryVersion = + kd._header->amd_machine_version_major * 10 + + kd._header->amd_machine_version_minor; + } r.ptxVersion = prop.major * 10 + prop.minor; // HIP currently presents itself as PTX 3.0. return r; @@ -520,11 +567,10 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) auto agent = this_agent(); auto kd = get_program_state().kernel_descriptor(reinterpret_cast(func), agent); - const auto header = kd->_header; - if (!header) throw runtime_error{"Ill-formed Kernel_descriptor."}; + if (!kd->_header) throw runtime_error{"Ill-formed Kernel_descriptor."}; - *attr = make_function_attributes(*header); + *attr = make_function_attributes(*kd); return hipSuccess; } @@ -555,11 +601,9 @@ hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) { (*module)->executable = get_program_state().load_executable( content.data(), content.size(), (*module)->executable, this_agent()); - istringstream elf{content}; - ELFIO::elfio reader; - if (reader.load(elf)) { - program_state_impl::read_kernarg_metadata(reader, (*module)->kernargs); - } + + std::vector blob(content.cbegin(), content.cend()); + program_state_impl::read_kernarg_metadata(blob, (*module)->kernargs); // compute the hash of the code object (*module)->hash = checksum(content.length(), content.data()); diff --git a/hipamd/src/program_state.inl b/hipamd/src/program_state.inl index f1397b3fe9..639eac9228 100644 --- a/hipamd/src/program_state.inl +++ b/hipamd/src/program_state.inl @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -540,9 +541,13 @@ public: std::call_once(functions[agent].first, [this](hsa_agent_t aa) { for (auto&& function : get_function_names()) { - const auto it = get_kernels(aa).find(function.second); + auto it = get_kernels(aa).find(function.second); - if (it == get_kernels(aa).cend()) continue; + if (it == get_kernels(aa).cend()) { + it = get_kernels(aa).find(function.second + ".kd"); + if (it == get_kernels(aa).cend()) + continue; + } for (auto&& kernel_symbol : it->second) { functions[aa].second.emplace( @@ -556,92 +561,172 @@ public: } static - std::size_t parse_args( - const std::string& metadata, - std::size_t f, - std::size_t l, + std::string metadata_to_string(const amd_comgr_metadata_node_t& md) { + std::string str; + size_t size; + + if (amd_comgr_get_metadata_string(md, &size, NULL) + == AMD_COMGR_STATUS_SUCCESS) { + str.resize(size - 1); + amd_comgr_get_metadata_string(md, &size, &str[0]); + } + return str; + } + + static + void parse_args( + const amd_comgr_metadata_node_t& args_md, + bool is_code_object_v3, std::vector>& size_align) { - if (f == l) return f; - if (!size_align.empty()) return l; + size_t arg_count = 0; + if (amd_comgr_get_metadata_list_size(args_md, &arg_count) + != AMD_COMGR_STATUS_SUCCESS) + return; - do { - static constexpr size_t size_sz{5}; - f = metadata.find("Size:", f) + size_sz; + for (size_t i = 0; i < arg_count; ++i) { + amd_comgr_metadata_node_t arg_md; - if (l <= f) return f; + if (amd_comgr_index_list_metadata(args_md, i, &arg_md) + != AMD_COMGR_STATUS_SUCCESS) + return; - auto size = std::strtoul(&metadata[f], nullptr, 10); + amd_comgr_metadata_node_t arg_size_md; + if (amd_comgr_metadata_lookup(arg_md, + is_code_object_v3 ? ".size" : "Size", + &arg_size_md) + != AMD_COMGR_STATUS_SUCCESS) + return; - static constexpr size_t align_sz{6}; - f = metadata.find("Align:", f) + align_sz; + size_t arg_size = std::stoul(metadata_to_string(arg_size_md)); - char* l{}; - auto align = std::strtoul(&metadata[f], &l, 10); + if (amd_comgr_destroy_metadata(arg_size_md) + != AMD_COMGR_STATUS_SUCCESS) + return; - f += (l - &metadata[f]) + 1; + size_t arg_align; - size_align.emplace_back(size, align); - } while (true); + if (is_code_object_v3) { + amd_comgr_metadata_node_t arg_offset_md; + if (amd_comgr_metadata_lookup(arg_md, ".offset", &arg_offset_md) + != AMD_COMGR_STATUS_SUCCESS) + return; + + size_t arg_offset + = std::stoul(metadata_to_string(arg_offset_md)); + + if (amd_comgr_destroy_metadata(arg_offset_md) + != AMD_COMGR_STATUS_SUCCESS) + return; + + arg_align = 1; + while (arg_offset && (arg_offset & 1) == 0) { + arg_offset >>= 1; + arg_align <<= 1; + } + } else { + amd_comgr_metadata_node_t arg_align_md; + if (amd_comgr_metadata_lookup(arg_md, "Align", &arg_align_md) + != AMD_COMGR_STATUS_SUCCESS) + return; + + arg_align = std::stoul(metadata_to_string(arg_align_md)); + + if (amd_comgr_destroy_metadata(arg_align_md) + != AMD_COMGR_STATUS_SUCCESS) + return; + } + + size_align.emplace_back(arg_size, arg_align); + + if (amd_comgr_destroy_metadata(arg_md) + != AMD_COMGR_STATUS_SUCCESS) + return; + } } static void read_kernarg_metadata( - ELFIO::elfio& reader, + const std::vector& blob, std::unordered_map< std::string, std::vector>>& kernargs) { - // TODO: this is inefficient. - auto it = find_section_if(reader, [](const ELFIO::section* x) { - return x->get_type() == SHT_NOTE; - }); + amd_comgr_data_t dataIn; + amd_comgr_status_t status; - if (!it) return; + if (amd_comgr_create_data(AMD_COMGR_DATA_KIND_RELOCATABLE, &dataIn) + != AMD_COMGR_STATUS_SUCCESS) + return; - const ELFIO::note_section_accessor acc{reader, it}; - for (decltype(acc.get_notes_num()) i = 0; i != acc.get_notes_num(); ++i) { - ELFIO::Elf_Word type{}; - std::string name{}; - void* desc{}; - ELFIO::Elf_Word desc_size{}; + if (amd_comgr_set_data(dataIn, blob.size(), blob.data()) + != AMD_COMGR_STATUS_SUCCESS) + return; - acc.get_note(i, type, name, desc, desc_size); + amd_comgr_metadata_node_t metadata; + if (amd_comgr_get_data_metadata(dataIn, &metadata) + != AMD_COMGR_STATUS_SUCCESS) + return; - if (name != "AMD") continue; // TODO: switch to using NT_AMD_AMDGPU_HSA_METADATA. - - std::string tmp{ - static_cast(desc), static_cast(desc) + desc_size}; - - auto dx = tmp.find("Kernels:"); - - if (dx == std::string::npos) continue; - - static constexpr decltype(tmp.size()) kernels_sz{8}; - dx += kernels_sz; - - do { - dx = tmp.find("Name:", dx); - - if (dx == std::string::npos) break; - - static constexpr decltype(tmp.size()) name_sz{5}; - dx = tmp.find_first_not_of(" '", dx + name_sz); - - auto fn = tmp.substr(dx, tmp.find_first_of("'\n", dx) - dx); - dx += fn.size(); - - auto dx1 = tmp.find("CodeProps", dx); - dx = tmp.find("Args:", dx); - - if (dx1 < dx) { - dx = dx1; - continue; - } - if (dx == std::string::npos) break; - - static constexpr decltype(tmp.size()) args_sz{5}; - dx = parse_args(tmp, dx + args_sz, dx1, kernargs[fn]); - } while (true); + bool is_code_object_v3 = false; + amd_comgr_metadata_node_t kernels_md; + if (amd_comgr_metadata_lookup(metadata, "Kernels", &kernels_md) + != AMD_COMGR_STATUS_SUCCESS) { + if (amd_comgr_metadata_lookup(metadata, + "amdhsa.kernels", + &kernels_md) + != AMD_COMGR_STATUS_SUCCESS) + return; + is_code_object_v3 = true; } + + size_t kernel_count = 0; + if (amd_comgr_get_metadata_list_size(kernels_md, &kernel_count) + != AMD_COMGR_STATUS_SUCCESS) + return; + + for (size_t i = 0; i < kernel_count; i++) { + amd_comgr_metadata_node_t kernel_md; + + if (amd_comgr_index_list_metadata(kernels_md, i, &kernel_md) + != AMD_COMGR_STATUS_SUCCESS) + continue; + + amd_comgr_metadata_node_t name_md; + if (amd_comgr_metadata_lookup(kernel_md, + is_code_object_v3 ? ".name" : "Name", + &name_md) + != AMD_COMGR_STATUS_SUCCESS) + continue; + + std::string kernel_name_str = metadata_to_string(name_md); + + if (amd_comgr_destroy_metadata(name_md) + != AMD_COMGR_STATUS_SUCCESS) + continue; + + if (is_code_object_v3) + kernel_name_str.append(".kd"); + + + amd_comgr_metadata_node_t args_md; + if (amd_comgr_metadata_lookup(kernel_md, + is_code_object_v3 ? ".args" : "Args", + &args_md) + != AMD_COMGR_STATUS_SUCCESS) + continue; + + parse_args(args_md, is_code_object_v3, kernargs[kernel_name_str]); + + if (amd_comgr_destroy_metadata(args_md) != AMD_COMGR_STATUS_SUCCESS + || amd_comgr_destroy_metadata(kernel_md) + != AMD_COMGR_STATUS_SUCCESS) + continue; + } + + if (amd_comgr_destroy_metadata(kernels_md) != AMD_COMGR_STATUS_SUCCESS + || amd_comgr_destroy_metadata(metadata) != AMD_COMGR_STATUS_SUCCESS) + return; + + amd_comgr_release_data(dataIn); } const std::unordered_mapsecond); if (it1 == get_kernargs().end()) { - hip_throw(std::runtime_error{ - "Missing metadata for __global__ function: " + it->second}); + it1 = get_kernargs().find(it->second + ".kd"); + if (it1 == get_kernargs().end()) { + hip_throw(std::runtime_error{ + "Missing metadata for __global__ function: " + it->second}); + } } return it1->second; From c1ac414f1476078ea448ca6d0872822ff256d515 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 24 May 2019 15:14:14 +0300 Subject: [PATCH 12/24] [HIPIFY][LLVMCompat] Support of upcoming LLVM 9.0.0 --- hipamd/hipify-clang/src/LLVMCompat.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hipamd/hipify-clang/src/LLVMCompat.cpp b/hipamd/hipify-clang/src/LLVMCompat.cpp index 87dedc630e..d2573ecf22 100644 --- a/hipamd/hipify-clang/src/LLVMCompat.cpp +++ b/hipamd/hipify-clang/src/LLVMCompat.cpp @@ -59,7 +59,11 @@ void EnterPreprocessorTokenStream(clang::Preprocessor& _pp, const clang::Token * #if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 8) _pp.EnterTokenStream(start, len, false, DisableMacroExpansion); #else - _pp.EnterTokenStream(clang::ArrayRef{start, len}, DisableMacroExpansion); + #if (LLVM_VERSION_MAJOR < 9) + _pp.EnterTokenStream(clang::ArrayRef{start, len}, DisableMacroExpansion); + #else + _pp.EnterTokenStream(clang::ArrayRef{start, len}, DisableMacroExpansion, false); + #endif #endif } From d34805f07a2cdb4fa0f8b1708c60e08667f240fb Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Fri, 24 May 2019 19:57:04 +0000 Subject: [PATCH 13/24] Workaround HIP-Clang missing libhiprtc.so packaging --- hipamd/packaging/hip_hcc.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hipamd/packaging/hip_hcc.txt b/hipamd/packaging/hip_hcc.txt index 5aebc6c36d..365af8ef1e 100644 --- a/hipamd/packaging/hip_hcc.txt +++ b/hipamd/packaging/hip_hcc.txt @@ -3,7 +3,9 @@ project(hip_hcc) install(FILES @PROJECT_BINARY_DIR@/libhip_hcc.so DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/libhip_hcc_static.a DESTINATION lib) -install(FILES @PROJECT_BINARY_DIR@/libhiprtc.so DESTINATION lib) +if(NOT @HIP_COMPILER@ STREQUAL "clang") + install(FILES @PROJECT_BINARY_DIR@/libhiprtc.so DESTINATION lib) +endif() install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip) From b312ac8f61ec66dcea9e0385e39ba1e8232e8583 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 27 May 2019 21:44:53 +0300 Subject: [PATCH 14/24] [HIPIFY][DNN] Support of cuDNN 7.6.0 + Update docs and README.md accordingly --- .../markdown/CUDNN_API_supported_by_HIP.md | 113 ++++++++++++++++++ hipamd/hipify-clang/README.md | 16 +-- .../src/CUDA2HIP_DNN_API_functions.cpp | 22 ++++ .../src/CUDA2HIP_DNN_API_types.cpp | 94 ++++++++++++++- 4 files changed, 236 insertions(+), 9 deletions(-) diff --git a/hipamd/docs/markdown/CUDNN_API_supported_by_HIP.md b/hipamd/docs/markdown/CUDNN_API_supported_by_HIP.md index 2a3fa1a67a..3b59de4195 100644 --- a/hipamd/docs/markdown/CUDNN_API_supported_by_HIP.md +++ b/hipamd/docs/markdown/CUDNN_API_supported_by_HIP.md @@ -246,6 +246,99 @@ | enum |***`cudnnWgradMode_t`*** | | | 0 |*`CUDNN_WGRAD_MODE_ADD`* | | | 1 |*`CUDNN_WGRAD_MODE_SET`* | | +| enum |***`cudnnReorderType_t`*** | | +| 0 |*`CUDNN_DEFAULT_REORDER`* | | +| 1 |*`CUDNN_NO_REORDER`* | | +| enum |***`cudnnLossNormalizationMode_t`*** | | +| 0 |*`CUDNN_LOSS_NORMALIZATION_NONE`* | | +| 1 |*`CUDNN_LOSS_NORMALIZATION_SOFTMAX`* | | +| struct |`cudnnFusedOpsConstParamStruct` | | +| struct* |`cudnnFusedOpsConstParamPack_t` | | +| struct |`cudnnFusedOpsVariantParamStruct` | | +| struct* |`cudnnFusedOpsVariantParamPack_t` | | +| struct |`cudnnFusedOpsPlanStruct` | | +| struct* |`cudnnFusedOpsPlan_t` | | +| enum |***`cudnnFusedOps_t`*** | | +| 0 |*`CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS`* | | +| 1 |*`CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD`* | | +| 2 |*`CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING`* | | +| 3 |*`CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE`* | | +| 4 |*`CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION`* | | +| 5 |*`CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK`* | | +| 6 |*`CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM`* | | +| enum |***`cudnnFusedOpsConstParamLabel_t`*** | | +| 0 |*`CUDNN_PARAM_XDESC`* | | +| 1 |*`CUDNN_PARAM_XDATA_PLACEHOLDER`* | | +| 2 |*`CUDNN_PARAM_BN_MODE`* | | +| 3 |*`CUDNN_PARAM_BN_EQSCALEBIAS_DESC`* | | +| 4 |*`CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER`* | | +| 5 |*`CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER`* | | +| 6 |*`CUDNN_PARAM_ACTIVATION_DESC`* | | +| 7 |*`CUDNN_PARAM_CONV_DESC`* | | +| 8 |*`CUDNN_PARAM_WDESC`* | | +| 9 |*`CUDNN_PARAM_WDATA_PLACEHOLDER`* | | +| 10 |*`CUDNN_PARAM_DWDESC`* | | +| 11 |*`CUDNN_PARAM_DWDATA_PLACEHOLDER`* | | +| 12 |*`CUDNN_PARAM_YDESC`* | | +| 13 |*`CUDNN_PARAM_YDATA_PLACEHOLDER`* | | +| 14 |*`CUDNN_PARAM_DYDESC`* | | +| 15 |*`CUDNN_PARAM_DYDATA_PLACEHOLDER`* | | +| 16 |*`CUDNN_PARAM_YSTATS_DESC`* | | +| 17 |*`CUDNN_PARAM_YSUM_PLACEHOLDER`* | | +| 18 |*`CUDNN_PARAM_YSQSUM_PLACEHOLDER`* | | +| 19 |*`CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC`* | | +| 20 |*`CUDNN_PARAM_BN_SCALE_PLACEHOLDER`* | | +| 21 |*`CUDNN_PARAM_BN_BIAS_PLACEHOLDER`* | | +| 22 |*`CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER`* | | +| 23 |*`CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER`* | | +| 24 |*`CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER`* | | +| 25 |*`CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER`* | | +| 26 |*`CUDNN_PARAM_ZDESC`* | | +| 27 |*`CUDNN_PARAM_ZDATA_PLACEHOLDER`* | | +| 28 |*`CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC`* | | +| 29 |*`CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER`* | | +| 30 |*`CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER`* | | +| 31 |*`CUDNN_PARAM_ACTIVATION_BITMASK_DESC`* | | +| 32 |*`CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER`* | | +| 33 |*`CUDNN_PARAM_DXDESC`* | | +| 34 |*`CUDNN_PARAM_DXDATA_PLACEHOLDER`* | | +| 35 |*`CUDNN_PARAM_DZDESC`* | | +| 36 |*`CUDNN_PARAM_DZDATA_PLACEHOLDER`* | | +| 37 |*`CUDNN_PARAM_BN_DSCALE_PLACEHOLDER`* | | +| 38 |*`CUDNN_PARAM_BN_DBIAS_PLACEHOLDER`* | | +| enum |***`cudnnFusedOpsPointerPlaceHolder_t`*** | | +| 0 |*`CUDNN_PTR_NULL`* | | +| 1 |*`CUDNN_PTR_ELEM_ALIGNED`* | | +| 2 |*`CUDNN_PTR_16B_ALIGNED`* | | +| enum |***`cudnnFusedOpsVariantParamLabel_t`*** | | +| 0 |*`CUDNN_PTR_XDATA`* | | +| 1 |*`CUDNN_PTR_BN_EQSCALE`* | | +| 2 |*`CUDNN_PTR_BN_EQBIAS`* | | +| 3 |*`CUDNN_PTR_WDATA`* | | +| 4 |*`CUDNN_PTR_DWDATA`* | | +| 5 |*`CUDNN_PTR_YDATA`* | | +| 6 |*`CUDNN_PTR_DYDATA`* | | +| 7 |*`CUDNN_PTR_YSUM`* | | +| 8 |*`CUDNN_PTR_YSQSUM`* | | +| 9 |*`CUDNN_PTR_WORKSPACE`* | | +| 10 |*`CUDNN_PTR_BN_SCALE`* | | +| 11 |*`CUDNN_PTR_BN_BIAS`* | | +| 12 |*`CUDNN_PTR_BN_SAVED_MEAN`* | | +| 13 |*`CUDNN_PTR_BN_SAVED_INVSTD`* | | +| 14 |*`CUDNN_PTR_BN_RUNNING_MEAN`* | | +| 15 |*`CUDNN_PTR_BN_RUNNING_VAR`* | | +| 16 |*`CUDNN_PTR_ZDATA`* | | +| 17 |*`CUDNN_PTR_BN_Z_EQSCALE`* | | +| 18 |*`CUDNN_PTR_BN_Z_EQBIAS`* | | +| 19 |*`CUDNN_PTR_ACTIVATION_BITMASK`* | | +| 20 |*`CUDNN_PTR_DXDATA`* | | +| 21 |*`CUDNN_PTR_DZDATA`* | | +| 22 |*`CUDNN_PTR_BN_DSCALE`* | | +| 23 |*`CUDNN_PTR_BN_DBIAS`* | | +| 100 |*`CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES`* | | +| 101 |*`CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT`* | | +| 102 |*`CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR`* | | +| 103 |*`CUDNN_SCALAR_DOUBLE_BN_EPSILON`* | | ## **2. CUDNN API functions** @@ -282,6 +375,7 @@ |`cudnnGetOpTensorDescriptor` |`hipdnnGetOpTensorDescriptor` | |`cudnnDestroyOpTensorDescriptor` |`hipdnnDestroyOpTensorDescriptor` | |`cudnnOpTensor` |`hipdnnOpTensor` | +|`cudnnGetFoldedConvBackwardDataDescriptors` | | |`cudnnCreateReduceTensorDescriptor` |`hipdnnCreateReduceTensorDescriptor` | |`cudnnSetReduceTensorDescriptor` |`hipdnnSetReduceTensorDescriptor` | |`cudnnGetReduceTensorDescriptor` |`hipdnnGetReduceTensorDescriptor` | @@ -296,12 +390,17 @@ |`cudnnGetFilter4dDescriptor` |`hipdnnGetFilter4dDescriptor` | |`cudnnSetFilterNdDescriptor` |`hipdnnSetFilterNdDescriptor` | |`cudnnGetFilterNdDescriptor` |`hipdnnGetFilterNdDescriptor` | +|`cudnnGetFilterSizeInBytes` | | +|`cudnnTransformFilter` | | |`cudnnDestroyFilterDescriptor` |`hipdnnDestroyFilterDescriptor` | +|`cudnnReorderFilterAndBias` | | |`cudnnCreateConvolutionDescriptor` |`hipdnnCreateConvolutionDescriptor` | |`cudnnSetConvolutionMathType` |`hipdnnSetConvolutionMathType` | |`cudnnGetConvolutionMathType` | | |`cudnnSetConvolutionGroupCount` |`hipdnnSetConvolutionGroupCount` | |`cudnnGetConvolutionGroupCount` | | +|`cudnnSetConvolutionReorderType` | | +|`cudnnGetConvolutionReorderType` | | |`cudnnSetConvolution2dDescriptor` |`hipdnnSetConvolution2dDescriptor` | |`cudnnGetConvolution2dDescriptor` |`hipdnnGetConvolution2dDescriptor` | |`cudnnGetConvolution2dForwardOutputDim` |`hipdnnGetConvolution2dForwardOutputDim` | @@ -424,7 +523,9 @@ |`cudnnGetRNNBiasMode` | | |`cudnnCreateCTCLossDescriptor` | | |`cudnnSetCTCLossDescriptor` | | +|`cudnnSetCTCLossDescriptorEx` | | |`cudnnGetCTCLossDescriptor` | | +|`cudnnGetCTCLossDescriptorEx` | | |`cudnnDestroyCTCLossDescriptor` | | |`cudnnCTCLoss` | | |`cudnnGetCTCLossWorkspaceSize` | | @@ -462,3 +563,15 @@ |`cudnnMultiHeadAttnForward` | | |`cudnnMultiHeadAttnBackwardData` | | |`cudnnMultiHeadAttnBackwardWeights` | | +|`cudnnCreateFusedOpsConstParamPack` | | +|`cudnnDestroyFusedOpsConstParamPack` | | +|`cudnnSetFusedOpsConstParamPackAttribute` | | +|`cudnnGetFusedOpsConstParamPackAttribute` | | +|`cudnnCreateFusedOpsVariantParamPack` | | +|`cudnnDestroyFusedOpsVariantParamPack` | | +|`cudnnSetFusedOpsVariantParamPackAttribute` | | +|`cudnnGetFusedOpsVariantParamPackAttribute` | | +|`cudnnCreateFusedOpsPlan` | | +|`cudnnDestroyFusedOpsPlan` | | +|`cudnnMakeFusedOpsPlan` | | +|`cudnnFusedOpsExecute` | | diff --git a/hipamd/hipify-clang/README.md b/hipamd/hipify-clang/README.md index 9ec0a7b9e7..7744085f16 100644 --- a/hipamd/hipify-clang/README.md +++ b/hipamd/hipify-clang/README.md @@ -142,9 +142,9 @@ To run it: * Path to cuDNN should be specified by the `CUDA_DNN_ROOT_DIR` option: - - Linux: `-DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.0-v7.5.1.10` + - Linux: `-DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.0-v7.6.0.64` - - Windows: `-DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-9.0-windows10-x64-v7.5.1.10` + - Windows: `-DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-9.0-windows10-x64-v7.6.0.64` 5. Ensure [`python`](https://www.python.org/downloads) of minimum required version 2.7 is installed. @@ -178,9 +178,9 @@ To run it: On Linux the following configurations are tested: -Ubuntu 14: LLVM 5.0.0 - 6.0.1, CUDA 7.0 - 9.0, cudnn-5.0.5 - cudnn-7.5.1.10 +Ubuntu 14: LLVM 5.0.0 - 6.0.1, CUDA 7.0 - 9.0, cudnn-5.0.5 - cudnn-7.6.0.64 -Ubuntu 16-18: LLVM 8.0.0, CUDA 8.0 - 10.0, cudnn-5.1.10 - cudnn-7.5.1.10 +Ubuntu 16-18: LLVM 8.0.0, CUDA 8.0 - 10.0, cudnn-5.1.10 - cudnn-7.6.0.64 Build system for the above configurations: @@ -195,7 +195,7 @@ cmake -DCMAKE_INSTALL_PREFIX=../dist \ -DCMAKE_PREFIX_PATH=/srv/git/LLVM/8.0.0/dist \ -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.0 \ - -DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.0-v7.5.1.10 \ + -DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.0-v7.6.0.64 \ -DLLVM_EXTERNAL_LIT=/srv/git/LLVM/8.0.0/build/bin/llvm-lit \ .. ``` @@ -311,9 +311,9 @@ On Windows 10 the following configurations are tested: LLVM 5.0.0 - 5.0.2, CUDA 8.0, cudnn-5.1.10 - cudnn-7.1.4.18 -LLVM 6.0.0 - 6.0.1, CUDA 9.0, cudnn-7.0.5.15 - cudnn-7.5.1.10 +LLVM 6.0.0 - 6.0.1, CUDA 9.0, cudnn-7.0.5.15 - cudnn-7.6.0.64 -LLVM 7.0.0 - 8.0.0 (with patch*), CUDA 7.5 - 10.0, cudnn-7.0.5.15 - cudnn-7.5.1.10 +LLVM 7.0.0 - 8.0.0 (with patch*), CUDA 7.5 - 10.0, cudnn-7.0.5.15 - cudnn-7.6.0.64 Build system for the above configurations: @@ -330,7 +330,7 @@ cmake -DCMAKE_PREFIX_PATH=f:/LLVM/6.0.1/dist \ -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0" \ -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v9.0" \ - -DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-9.0-windows10-x64-v7.5.1.10 \ + -DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-9.0-windows10-x64-v7.6.0.64 \ -DLLVM_EXTERNAL_LIT=f:/LLVM/6.0.1/build/Release/bin/llvm-lit.py \ -Thost=x64 .. diff --git a/hipamd/hipify-clang/src/CUDA2HIP_DNN_API_functions.cpp b/hipamd/hipify-clang/src/CUDA2HIP_DNN_API_functions.cpp index a52c392b72..765ce78a26 100644 --- a/hipamd/hipify-clang/src/CUDA2HIP_DNN_API_functions.cpp +++ b/hipamd/hipify-clang/src/CUDA2HIP_DNN_API_functions.cpp @@ -61,6 +61,7 @@ const std::map CUDA_DNN_FUNCTION_MAP{ {"cudnnGetOpTensorDescriptor", {"hipdnnGetOpTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, {"cudnnDestroyOpTensorDescriptor", {"hipdnnDestroyOpTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, {"cudnnOpTensor", {"hipdnnOpTensor", "", CONV_LIB_FUNC, API_DNN}}, + {"cudnnGetFoldedConvBackwardDataDescriptors", {"hipdnnGetFoldedConvBackwardDataDescriptors", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, // cuDNN Reduce Tensor functions {"cudnnCreateReduceTensorDescriptor", {"hipdnnCreateReduceTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, @@ -79,7 +80,10 @@ const std::map CUDA_DNN_FUNCTION_MAP{ {"cudnnGetFilter4dDescriptor", {"hipdnnGetFilter4dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, {"cudnnSetFilterNdDescriptor", {"hipdnnSetFilterNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, {"cudnnGetFilterNdDescriptor", {"hipdnnGetFilterNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, + {"cudnnGetFilterSizeInBytes", {"hipdnnGetFilterSizeInBytes", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnTransformFilter", {"hipdnnTransformFilter", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnDestroyFilterDescriptor", {"hipdnnDestroyFilterDescriptor", "", CONV_LIB_FUNC, API_DNN}}, + {"cudnnReorderFilterAndBias", {"hipdnnReorderFilterAndBias", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, // cuDNN Convolution functions {"cudnnCreateConvolutionDescriptor", {"hipdnnCreateConvolutionDescriptor", "", CONV_LIB_FUNC, API_DNN}}, @@ -87,6 +91,8 @@ const std::map CUDA_DNN_FUNCTION_MAP{ {"cudnnGetConvolutionMathType", {"hipdnnGetConvolutionMathType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnSetConvolutionGroupCount", {"hipdnnSetConvolutionGroupCount", "", CONV_LIB_FUNC, API_DNN}}, {"cudnnGetConvolutionGroupCount", {"hipdnnGetConvolutionGroupCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetConvolutionReorderType", {"hipdnnSetConvolutionReorderType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetConvolutionReorderType", {"hipdnnGetConvolutionReorderType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnSetConvolution2dDescriptor", {"hipdnnSetConvolution2dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, {"cudnnGetConvolution2dDescriptor", {"hipdnnGetConvolution2dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, {"cudnnGetConvolution2dForwardOutputDim", {"hipdnnGetConvolution2dForwardOutputDim", "", CONV_LIB_FUNC, API_DNN}}, @@ -235,7 +241,9 @@ const std::map CUDA_DNN_FUNCTION_MAP{ // cuDNN Connectionist Temporal Classification loss functions {"cudnnCreateCTCLossDescriptor", {"hipdnnCreateCTCLossDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnSetCTCLossDescriptor", {"hipdnnSetCTCLossDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetCTCLossDescriptorEx", {"hipdnnSetCTCLossDescriptorEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetCTCLossDescriptor", {"hipdnnGetCTCLossDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetCTCLossDescriptorEx", {"hipdnnGetCTCLossDescriptorEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnDestroyCTCLossDescriptor", {"hipdnnDestroyCTCLossDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnCTCLoss", {"hipdnnCTCLoss", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetCTCLossWorkspaceSize", {"hipdnnGetCTCLossWorkspaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, @@ -274,4 +282,18 @@ const std::map CUDA_DNN_FUNCTION_MAP{ {"cudnnMultiHeadAttnForward", {"hipdnnMultiHeadAttnForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnMultiHeadAttnBackwardData", {"hipdnnMultiHeadAttnBackwardData", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnMultiHeadAttnBackwardWeights", {"hipdnnMultiHeadAttnBackwardWeights", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + + // cuDNN Fuse functions + {"cudnnCreateFusedOpsConstParamPack", {"hipdnnCreateFusedOpsConstParamPack", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnDestroyFusedOpsConstParamPack", {"hipdnnDestroyFusedOpsConstParamPack", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetFusedOpsConstParamPackAttribute", {"hipdnnSetFusedOpsConstParamPackAttribute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetFusedOpsConstParamPackAttribute", {"hipdnnGetFusedOpsConstParamPackAttribute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnCreateFusedOpsVariantParamPack", {"hipdnnCreateFusedOpsVariantParamPack", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnDestroyFusedOpsVariantParamPack", {"hipdnnDestroyFusedOpsVariantParamPack", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetFusedOpsVariantParamPackAttribute", {"hipdnnSetFusedOpsVariantParamPackAttribute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetFusedOpsVariantParamPackAttribute", {"hipdnnGetFusedOpsVariantParamPackAttribute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnCreateFusedOpsPlan", {"hipdnnCreateFusedOpsPlan", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnDestroyFusedOpsPlan", {"hipdnnDestroyFusedOpsPlan", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnMakeFusedOpsPlan", {"hipdnnMakeFusedOpsPlan", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnFusedOpsExecute", {"hipdnnFusedOpsExecute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, }; diff --git a/hipamd/hipify-clang/src/CUDA2HIP_DNN_API_types.cpp b/hipamd/hipify-clang/src/CUDA2HIP_DNN_API_types.cpp index 349e243082..a7d277e4c2 100644 --- a/hipamd/hipify-clang/src/CUDA2HIP_DNN_API_types.cpp +++ b/hipamd/hipify-clang/src/CUDA2HIP_DNN_API_types.cpp @@ -240,6 +240,93 @@ const std::map CUDA_DNN_TYPE_NAME_MAP{ {"cudnnWgradMode_t", {"hipdnnWgradMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"CUDNN_WGRAD_MODE_ADD", {"HIPDNN_WGRAD_MODE_ADD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 {"CUDNN_WGRAD_MODE_SET", {"HIPDNN_WGRAD_MODE_SET", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 + {"cudnnReorderType_t", {"hipdnnReorderType_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_DEFAULT_REORDER", {"HIPDNN_DEFAULT_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_NO_REORDER", {"HIPDNN_NO_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 + {"cudnnLossNormalizationMode_t", {"hipdnnLossNormalizationMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_LOSS_NORMALIZATION_NONE", {"HIPDNN_LOSS_NORMALIZATION_NONE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_LOSS_NORMALIZATION_SOFTMAX", {"HIPDNN_LOSS_NORMALIZATION_SOFTMAX", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 + {"cudnnFusedOps_t", {"hipdnnFusedOps_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", {"HIPDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", {"HIPDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 + {"CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", {"HIPDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 + {"CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", {"HIPDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 + {"CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", {"HIPDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 + {"CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", {"HIPDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 + {"CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", {"HIPDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 + {"cudnnFusedOpsConstParamLabel_t", {"hipdnnFusedOpsConstParamLabel_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_PARAM_XDESC", {"HIPDNN_PARAM_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_PARAM_XDATA_PLACEHOLDER", {"HIPDNN_PARAM_XDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 + {"CUDNN_PARAM_BN_MODE", {"HIPDNN_PARAM_BN_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 + {"CUDNN_PARAM_BN_EQSCALEBIAS_DESC", {"HIPDNN_PARAM_BN_EQSCALEBIAS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 + {"CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER", {"HIPDNN_PARAM_BN_EQSCALE_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 + {"CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER", {"HIPDNN_PARAM_BN_EQBIAS_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 + {"CUDNN_PARAM_ACTIVATION_DESC", {"HIPDNN_PARAM_ACTIVATION_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 + {"CUDNN_PARAM_CONV_DESC", {"HIPDNN_PARAM_CONV_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 + {"CUDNN_PARAM_WDESC", {"HIPDNN_PARAM_WDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 + {"CUDNN_PARAM_WDATA_PLACEHOLDER", {"HIPDNN_PARAM_WDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 9 + {"CUDNN_PARAM_DWDESC", {"HIPDNN_PARAM_DWDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 10 + {"CUDNN_PARAM_DWDATA_PLACEHOLDER", {"HIPDNN_PARAM_DWDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 11 + {"CUDNN_PARAM_YDESC", {"HIPDNN_PARAM_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 12 + {"CUDNN_PARAM_YDATA_PLACEHOLDER", {"HIPDNN_PARAM_YDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 13 + {"CUDNN_PARAM_DYDESC", {"HIPDNN_PARAM_DYDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 14 + {"CUDNN_PARAM_DYDATA_PLACEHOLDER", {"HIPDNN_PARAM_DYDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 15 + {"CUDNN_PARAM_YSTATS_DESC", {"HIPDNN_PARAM_YSTATS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 16 + {"CUDNN_PARAM_YSUM_PLACEHOLDER", {"HIPDNN_PARAM_YSUM_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 17 + {"CUDNN_PARAM_YSQSUM_PLACEHOLDER", {"HIPDNN_PARAM_YSQSUM_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 18 + {"CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", {"HIPDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 19 + {"CUDNN_PARAM_BN_SCALE_PLACEHOLDER", {"HIPDNN_PARAM_BN_SCALE_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 20 + {"CUDNN_PARAM_BN_BIAS_PLACEHOLDER", {"HIPDNN_PARAM_BN_BIAS_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 21 + {"CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", {"HIPDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 22 + {"CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", {"HIPDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 23 + {"CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", {"HIPDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 24 + {"CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", {"HIPDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 25 + {"CUDNN_PARAM_ZDESC", {"HIPDNN_PARAM_ZDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 26 + {"CUDNN_PARAM_ZDATA_PLACEHOLDER", {"HIPDNN_PARAM_ZDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 27 + {"CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", {"HIPDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 28 + {"CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", {"HIPDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 29 + {"CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", {"HIPDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 30 + {"CUDNN_PARAM_ACTIVATION_BITMASK_DESC", {"HIPDNN_PARAM_ACTIVATION_BITMASK_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 31 + {"CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", {"HIPDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 32 + {"CUDNN_PARAM_DXDESC", {"HIPDNN_PARAM_DXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 33 + {"CUDNN_PARAM_DXDATA_PLACEHOLDER", {"HIPDNN_PARAM_DXDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 34 + {"CUDNN_PARAM_DZDESC", {"HIPDNN_PARAM_DZDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 35 + {"CUDNN_PARAM_DZDATA_PLACEHOLDER", {"HIPDNN_PARAM_DZDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 36 + {"CUDNN_PARAM_BN_DSCALE_PLACEHOLDER", {"HIPDNN_PARAM_BN_DSCALE_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 37 + {"CUDNN_PARAM_BN_DBIAS_PLACEHOLDER", {"HIPDNN_PARAM_BN_DBIAS_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 38 + {"cudnnFusedOpsPointerPlaceHolder_t", {"hipdnnActivationMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_PTR_NULL", {"HIPDNN_ACTIVATION_SIGMOID", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_PTR_ELEM_ALIGNED", {"HIPDNN_ACTIVATION_RELU", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 + {"CUDNN_PTR_16B_ALIGNED", {"HIPDNN_ACTIVATION_TANH", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 + {"cudnnFusedOpsVariantParamLabel_t", {"hipdnnFusedOpsVariantParamLabel_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_PTR_XDATA", {"HIPDNN_PTR_XDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_PTR_BN_EQSCALE", {"HIPDNN_PTR_BN_EQSCALE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 + {"CUDNN_PTR_BN_EQBIAS", {"HIPDNN_PTR_BN_EQBIAS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 + {"CUDNN_PTR_WDATA", {"HIPDNN_PTR_WDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 + {"CUDNN_PTR_DWDATA", {"HIPDNN_PTR_DWDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 + {"CUDNN_PTR_YDATA", {"HIPDNN_PTR_YDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 + {"CUDNN_PTR_DYDATA", {"HIPDNN_PTR_DYDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 + {"CUDNN_PTR_YSUM", {"HIPDNN_PTR_YSUM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 + {"CUDNN_PTR_YSQSUM", {"HIPDNN_PTR_YSQSUM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 + {"CUDNN_PTR_WORKSPACE", {"HIPDNN_PTR_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 9 + {"CUDNN_PTR_BN_SCALE", {"HIPDNN_PTR_BN_SCALE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 10 + {"CUDNN_PTR_BN_BIAS", {"HIPDNN_PTR_BN_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 11 + {"CUDNN_PTR_BN_SAVED_MEAN", {"HIPDNN_PTR_BN_SAVED_MEAN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 12 + {"CUDNN_PTR_BN_SAVED_INVSTD", {"HIPDNN_PTR_BN_SAVED_INVSTD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 13 + {"CUDNN_PTR_BN_RUNNING_MEAN", {"HIPDNN_PTR_BN_RUNNING_MEAN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 14 + {"CUDNN_PTR_BN_RUNNING_VAR", {"HIPDNN_PTR_BN_RUNNING_VAR", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 15 + {"CUDNN_PTR_ZDATA", {"HIPDNN_PTR_ZDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 16 + {"CUDNN_PTR_BN_Z_EQSCALE", {"HIPDNN_PTR_BN_Z_EQSCALE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 17 + {"CUDNN_PTR_BN_Z_EQBIAS", {"HIPDNN_PTR_BN_Z_EQBIAS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 18 + {"CUDNN_PTR_ACTIVATION_BITMASK", {"HIPDNN_PTR_ACTIVATION_BITMASK", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 19 + {"CUDNN_PTR_DXDATA", {"HIPDNN_PTR_DXDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 20 + {"CUDNN_PTR_DZDATA", {"HIPDNN_PTR_DZDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 21 + {"CUDNN_PTR_BN_DSCALE", {"HIPDNN_PTR_BN_DSCALE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 22 + {"CUDNN_PTR_BN_DBIAS", {"HIPDNN_PTR_BN_DBIAS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 23 + {"CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", {"HIPDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 100 + {"CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", {"HIPDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 101 + {"CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", {"HIPDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 102 + {"CUDNN_SCALAR_DOUBLE_BN_EPSILON", {"HIPDNN_SCALAR_DOUBLE_BN_EPSILON", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 103 // cuDNN types {"cudnnContext", {"hipdnnContext", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, @@ -288,5 +375,10 @@ const std::map CUDA_DNN_TYPE_NAME_MAP{ {"cudnnSeqDataDescriptor_t", {"hipdnnSeqDataDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"cudnnAttnStruct", {"hipdnnAttnStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"cudnnAttnDescriptor_t", {"hipdnnAttnDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - + {"cudnnFusedOpsConstParamStruct", {"hipdnnFusedOpsConstParamStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnFusedOpsConstParamPack_t", {"hipdnnFusedOpsConstParamPack_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnFusedOpsVariantParamStruct", {"hipdnnFusedOpsVariantParamStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnFusedOpsVariantParamPack_t", {"hipdnnFusedOpsVariantParamPack_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnFusedOpsPlanStruct", {"hipdnnFusedOpsPlanStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnFusedOpsPlan_t", {"hipdnnFusedOpsPlan_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, }; From 34b1c6c5b21b631fbdb9f7f7caf6b759ef3fdb41 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 28 May 2019 00:10:34 -0400 Subject: [PATCH 15/24] Fix hipcc for hip-clang. (#1143) * Fix hipcc for hip-clang. If there is -g, do not add -O3 by default. If HIP_VDI_HOME is not set, set HIP_VDI_HOME based on hipcc directory for HIP/VDI runtime. For HIP/VDI runtime, set HIP_CLANG_PATH and DEVICE_LIB_PATH based on HIP_VDI_HOME only if they exist. This allows using HIP/VDI runtime with hip-clang installed at /opt/rocm/llvm and device lib installed at /opt/rocm/lib. * Fix HIP_VDI_HOME for hipcc called from /opt/rocm/bin --- hipamd/bin/hipcc | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index 30a4e34704..a0bce40760 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -107,24 +107,25 @@ $HIP_RUNTIME= $hipConfig{'HIP_RUNTIME'}; # If using VDI runtime, need to find HIP_VDI_HOME if ($HIP_RUNTIME eq "VDI" and !defined $HIP_VDI_HOME) { - $HIP_VDI_HOME = "/opt/rocm/hip" + my $hipcc_dir = dirname($0); + if (-e "$hipcc_dir/.hipVersion") { + $HIP_VDI_HOME = abs_path($hipcc_dir . "/.."); + } else { + $HIP_VDI_HOME = "/opt/rocm/hip"; + } } if (defined $HIP_VDI_HOME) { - my $bits = ""; - if (-d "$HIP_VDI_HOME/bin/x86_64") { - $bits = "/x86_64"; + if (!defined $HIP_CLANG_PATH and -e "$HIP_VDI_HOME/bin/clang") { + $HIP_CLANG_PATH = "$HIP_VDI_HOME/bin"; + $HIP_CLANG_INCLUDE_PATH = "$HIP_VDI_HOME/include/clang"; } - if (!defined $HIP_CLANG_PATH) { - $HIP_CLANG_PATH = "$HIP_VDI_HOME/bin" . $bits; + if (!defined $DEVICE_LIB_PATH and -e "$HIP_VDI_HOME/lib/bitcode") { + $DEVICE_LIB_PATH = "$HIP_VDI_HOME/lib/bitcode"; } - if (!defined $DEVICE_LIB_PATH) { - $DEVICE_LIB_PATH = "$HIP_VDI_HOME/lib" . $bits . "/bitcode"; - } - $HIP_CLANG_INCLUDE_PATH = "$HIP_VDI_HOME/include/clang"; $HIP_INCLUDE_PATH = "$HIP_VDI_HOME/include"; if (!defined $HIP_LIB_PATH) { - $HIP_LIB_PATH = "$HIP_VDI_HOME/lib" . $bits; + $HIP_LIB_PATH = "$HIP_VDI_HOME/lib"; } } @@ -169,7 +170,7 @@ if ($HIP_PLATFORM eq "clang") { $HIP_CLANG_VERSION=$1; if (! defined $HIP_CLANG_INCLUDE_PATH) { - $HIP_CLANG_INCLUDE_PATH = "$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION/include"; + $HIP_CLANG_INCLUDE_PATH = abs_path("$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION/include"); } if (! defined $HIP_INCLUDE_PATH) { $HIP_INCLUDE_PATH = "$HIP_PATH/include"; @@ -370,6 +371,7 @@ if($HIP_PLATFORM eq "nvcc"){ my $toolArgs = ""; # arguments to pass to the hcc or nvcc tool my $optArg = ""; # -O args +my $gArg = ""; # -g args foreach $arg (@ARGV) { @@ -465,6 +467,10 @@ foreach $arg (@ARGV) { $optArg = $arg; } + if($arg =~ m/^-g/) + { + $gArg = $arg; + } ## process linker response file for hip-clang ## extract object files from static library and pass them directly to @@ -790,7 +796,7 @@ if ($needHipHcc) { if ($HIP_PLATFORM eq "clang") { # Set default optimization level to -O3 for hip-clang. - if ($optArg eq "") { + if ($optArg eq "" and $gArg ne "-g") { $HIPCXXFLAGS .= " -O3"; $HIPLDFLAGS .= " -O3"; } From 4af81134ba51c61664bba1062e30ae61b12a4d3c Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 28 May 2019 16:58:55 +0530 Subject: [PATCH 16/24] Header changes for cooperative groups Change-Id: I5f3acca94275d74adc97adcb168aed9f74951189 --- .../include/hip/hcc_detail/hip_runtime_api.h | 93 +++++++++++++++++++ hipamd/include/hip/hip_runtime_api.h | 4 + 2 files changed, 97 insertions(+) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/hipamd/include/hip/hcc_detail/hip_runtime_api.h index d870963101..ba8d2a21c5 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -275,6 +275,15 @@ typedef struct dim3 { #endif } dim3; +typedef struct hipLaunchParams_t { + void* func; ///< Device function symbol + dim3 gridDim; ///< Grid dimentions + dim3 blockDim; ///< Block dimentions + void **args; ///< Arguments + size_t sharedMem; ///< Shared memory + hipStream_t stream; ///< Stream identifier +} hipLaunchParams; + // Doxygen end group GlobalDefs /** @} */ @@ -2842,6 +2851,62 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigne unsigned int sharedMemBytes, hipStream_t stream, void** kernelParams, void** extra); +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed + * to kernelparams or extra, where thread blocks can cooperate and synchronize as they execute + * + * @param [in] f Kernel to launch. + * @param [in] gridDim Grid dimensions specified as multiple of blockDim. + * @param [in] blockDim Block dimensions specified in work-items + * @param [in] kernelParams A list of kernel arguments + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The + * kernel can access this with HIP_DYNAMIC_SHARED. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th + * default stream is used with associated synchronization rules. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + */ +hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, + void** kernelParams, unsigned int sharedMemBytes, + hipStream_t stream); + +/** + * @brief Launches kernels on multiple devices where thread blocks can cooperate and + * synchronize as they execute. + * + * @param [in] hipLaunchParams List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + */ +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags); + +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] func Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynamicSMemSize Per - block dynamic shared memory usage intended, in bytes + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, const void* f, int blockSize, size_t dynamicSMemSize); + +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] func Kernel function for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynamicSMemSize Per - block dynamic shared memory usage intended, in bytes + * @param [in] flags Extra flags for occupancy calculation (currently ignored) + */ +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, const void* f, int blockSize, size_t dynamicSMemSize, unsigned int flags); + + // doxygen end Version Management /** * @} @@ -3170,6 +3235,34 @@ hipError_t hipBindTextureToMipmappedArray(const texture& tex, return hipSuccess; } +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, T f, int blockSize, size_t dynamicSMemSize) { + return hipOccupancyMaxActiveBlocksPerMultiprocessor( + numBlocks, reinterpret_cast(f), blockSize, dynamicSMemSize); +} + +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, T f, int blockSize, size_t dynamicSMemSize, unsigned int flags) { + return hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + numBlocks, reinterpret_cast(f), blockSize, dynamicSMemSize, flags); +} + +template +inline hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, + void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) { + return hipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, + blockDim, kernelParams, sharedMemBytes, stream); +} + +template +inline hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + unsigned int numDevices, unsigned int flags = 0) { + return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags); +} + + /* * @brief Unbinds the textuer bound to @p tex * diff --git a/hipamd/include/hip/hip_runtime_api.h b/hipamd/include/hip/hip_runtime_api.h index e7ecede8c1..e3c10766e9 100644 --- a/hipamd/include/hip/hip_runtime_api.h +++ b/hipamd/include/hip/hip_runtime_api.h @@ -115,6 +115,8 @@ typedef struct hipDeviceProp_t { int canMapHostMemory; ///< Check whether HIP can map host memory int gcnArch; ///< AMD GCN Arch Value. Eg: 803, 701 int integrated; ///< APU vs dGPU + int cooperativeLaunch; ///< HIP device supports cooperative launch + int cooperativeMultiDeviceLaunch; ///< HIP device supports cooperative launch on multiple devices } hipDeviceProp_t; @@ -291,6 +293,8 @@ typedef enum hipDeviceAttribute_t { ///< Multiprocessor. hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. hipDeviceAttributeIntegrated, ///< iGPU + hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch + hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices } hipDeviceAttribute_t; enum hipComputeMode { From 332b19023da7c2b5a8151643f47d6d34d7396b28 Mon Sep 17 00:00:00 2001 From: Konstantin Pyzhov Date: Tue, 28 May 2019 09:38:17 -0400 Subject: [PATCH 17/24] Fixed setting HIP_CLANG_PATH on Windows. --- hipamd/bin/hipcc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index a0bce40760..a438f0afe5 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -116,7 +116,7 @@ if ($HIP_RUNTIME eq "VDI" and !defined $HIP_VDI_HOME) { } if (defined $HIP_VDI_HOME) { - if (!defined $HIP_CLANG_PATH and -e "$HIP_VDI_HOME/bin/clang") { + if (!defined $HIP_CLANG_PATH and (-e "$HIP_VDI_HOME/bin/clang" or -e "$HIP_VDI_HOME/bin/clang.exe")) { $HIP_CLANG_PATH = "$HIP_VDI_HOME/bin"; $HIP_CLANG_INCLUDE_PATH = "$HIP_VDI_HOME/include/clang"; } From 6aa704e7b97d02f623a15230fb0eea62bb4895a3 Mon Sep 17 00:00:00 2001 From: Zuhaib Khan Date: Tue, 28 May 2019 16:57:51 -0400 Subject: [PATCH 18/24] Structured hipFloatComplex as typedef of float2, and hipDoubleComplex as typedef of double2. --- hipamd/include/hip/hcc_detail/hip_complex.h | 220 ++++++++------------ 1 file changed, 92 insertions(+), 128 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/hip_complex.h b/hipamd/include/hip/hcc_detail/hip_complex.h index 128e2d670b..75930c469e 100644 --- a/hipamd/include/hip/hcc_detail/hip_complex.h +++ b/hipamd/include/hip/hcc_detail/hip_complex.h @@ -120,51 +120,102 @@ THE SOFTWARE. ret.y = lhs.y * rhs; \ return ret; \ } -#define MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(ComplexT, T) \ - explicit __device__ __host__ ComplexT(T val) : x(val), y(val) {} \ - __device__ __host__ ComplexT(T val1, T val2) : x(val1), y(val2) {} #endif -struct hipFloatComplex { -#ifdef __cplusplus - public: - typedef float value_type; - __device__ __host__ hipFloatComplex() : x(0.0f), y(0.0f) {} - explicit __device__ __host__ hipFloatComplex(float x) : x(x), y(0.0f) {} - __device__ __host__ hipFloatComplex(float x, float y) : x(x), y(y) {} - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, double) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipFloatComplex, signed long long) -#endif - float x, y; -} __attribute__((aligned(8))); +typedef float2 hipFloatComplex; + +__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return z.x; } + +__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return z.y; } + +__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) { + hipFloatComplex z; + z.x = a; + z.y = b; + return z; +} + +__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { + hipFloatComplex ret; + ret.x = z.x; + ret.y = -z.y; + return ret; +} + +__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) { + return z.x * z.x + z.y * z.y; +} + +__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) { + return make_hipFloatComplex(p.x + q.x, p.y + q.y); +} + +__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) { + return make_hipFloatComplex(p.x - q.x, p.y - q.y); +} + +__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) { + return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); +} + +__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) { + float sqabs = hipCsqabsf(q); + hipFloatComplex ret; + ret.x = (p.x * q.x + p.y * q.y) / sqabs; + ret.y = (p.y * q.x - p.x * q.y) / sqabs; + return ret; +} + +__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); } + + +typedef double2 hipDoubleComplex; + +__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return z.x; } + +__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return z.y; } + +__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) { + hipDoubleComplex z; + z.x = a; + z.y = b; + return z; +} + +__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { + hipDoubleComplex ret; + ret.x = z.x; + ret.y = z.y; + return ret; +} + +__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) { + return z.x * z.x + z.y * z.y; +} + +__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) { + return make_hipDoubleComplex(p.x + q.x, p.y + q.y); +} + +__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) { + return make_hipDoubleComplex(p.x - q.x, p.y - q.y); +} + +__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) { + return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); +} + +__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) { + double sqabs = hipCsqabs(q); + hipDoubleComplex ret; + ret.x = (p.x * q.x + p.y * q.y) / sqabs; + ret.y = (p.y * q.x - p.x * q.y) / sqabs; + return ret; +} + +__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return sqrtf(hipCsqabs(z)); } -struct hipDoubleComplex { -#ifdef __cplusplus - public: - typedef double value_type; - __device__ __host__ hipDoubleComplex() : x(0.0f), y(0.0f) {} - explicit __device__ __host__ hipDoubleComplex(double x) : x(x), y(0.0f) {} - __device__ __host__ hipDoubleComplex(double x, double y) : x(x), y(y) {} - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, unsigned short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, signed short) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, unsigned int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, signed int) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, float) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, unsigned long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, signed long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, unsigned long long) - MAKE_COMPONENT_CONSTRUCTOR_TWO_COMPONENT(hipDoubleComplex, signed long long) -#endif - double x, y; -} __attribute__((aligned(16))); #if __cplusplus @@ -214,93 +265,6 @@ COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long) #endif -__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return z.x; } - -__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return z.y; } - -__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) { - hipFloatComplex z; - z.x = a; - z.y = b; - return z; -} - -__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { - hipFloatComplex ret; - ret.x = z.x; - ret.y = -z.y; - return ret; -} - -__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) { - return z.x * z.x + z.y * z.y; -} - -__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) { - return make_hipFloatComplex(p.x + q.x, p.y + q.y); -} - -__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) { - return make_hipFloatComplex(p.x - q.x, p.y - q.y); -} - -__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) { - return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); -} - -__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) { - float sqabs = hipCsqabsf(q); - hipFloatComplex ret; - ret.x = (p.x * q.x + p.y * q.y) / sqabs; - ret.y = (p.y * q.x - p.x * q.y) / sqabs; - return ret; -} - -__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); } - -__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return z.x; } - -__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return z.y; } - -__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) { - hipDoubleComplex z; - z.x = a; - z.y = b; - return z; -} - -__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { - hipDoubleComplex ret; - ret.x = z.x; - ret.y = z.y; - return ret; -} - -__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) { - return z.x * z.x + z.y * z.y; -} - -__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) { - return make_hipDoubleComplex(p.x + q.x, p.y + q.y); -} - -__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) { - return make_hipDoubleComplex(p.x - q.x, p.y - q.y); -} - -__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) { - return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); -} - -__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) { - double sqabs = hipCsqabs(q); - hipDoubleComplex ret; - ret.x = (p.x * q.x + p.y * q.y) / sqabs; - ret.y = (p.y * q.x - p.x * q.y) / sqabs; - return ret; -} - -__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return sqrtf(hipCsqabs(z)); } typedef hipFloatComplex hipComplex; From b2ffd6afc224bdbf7445cf97895e2750354a9390 Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Wed, 29 May 2019 03:04:48 -0400 Subject: [PATCH 19/24] fix compilation error when host compiler is clang (#1147) * fix compilation error when host compiler is clang * use a macro specifically for hcc && hip-clang --- hipamd/include/hip/hcc_detail/hip_fp16.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/include/hip/hcc_detail/hip_fp16.h b/hipamd/include/hip/hcc_detail/hip_fp16.h index 93ede207c2..74424a9f8b 100644 --- a/hipamd/include/hip/hcc_detail/hip_fp16.h +++ b/hipamd/include/hip/hcc_detail/hip_fp16.h @@ -34,7 +34,7 @@ THE SOFTWARE. #include #endif -#if defined(__clang__) && (__clang_major__ > 5) +#if __HCC_OR_HIP_CLANG__ typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2))); struct __half_raw { From d8e94fd5b5366bdafe6d22762b10bcf352dcf43f Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Thu, 30 May 2019 18:04:05 -0400 Subject: [PATCH 20/24] Header change for new hip API hipExtLaunchMultiKernelMultiDevice --- .../include/hip/hcc_detail/hip_runtime_api.h | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/hipamd/include/hip/hcc_detail/hip_runtime_api.h index ba8d2a21c5..1b332cdb85 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -2906,6 +2906,21 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, const void* f, int blockSize, size_t dynamicSMemSize, unsigned int flags); +/** + * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched + * on respective streams before enqueuing any other work on the specified streams from any other threads + * + * + * @param [in] hipLaunchParams List of launch parameters, one per device. + * @param [in] numDevices Size of the launchParamsList array. + * @param [in] flags Flags to control launch behavior. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + */ +hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags); + + // doxygen end Version Management /** @@ -3262,6 +3277,12 @@ inline hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchP return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags); } +template +inline hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, + unsigned int numDevices, unsigned int flags = 0) { + return hipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags); +} + /* * @brief Unbinds the textuer bound to @p tex From 165d73de9a8c2c8a06f13d692c242895cf2e2f90 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Fri, 31 May 2019 16:39:33 +0530 Subject: [PATCH 21/24] [dtests] Temporarily disable hiprtc tests Change-Id: I87c0c01837e7b59b11d99fb94d679a765f914da5 --- hipamd/tests/src/hiprtc/hiprtcGetLoweredName.cpp | 2 +- hipamd/tests/src/hiprtc/hiprtcGetTypeName.cpp | 2 +- hipamd/tests/src/hiprtc/saxpy.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hipamd/tests/src/hiprtc/hiprtcGetLoweredName.cpp b/hipamd/tests/src/hiprtc/hiprtcGetLoweredName.cpp index e3fa057a81..d9e6d71f93 100644 --- a/hipamd/tests/src/hiprtc/hiprtcGetLoweredName.cpp +++ b/hipamd/tests/src/hiprtc/hiprtcGetLoweredName.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc - * TEST: %t + * TEST: %t EXCLUDE_HIP_PLATFORM hcc * HIT_END */ #include diff --git a/hipamd/tests/src/hiprtc/hiprtcGetTypeName.cpp b/hipamd/tests/src/hiprtc/hiprtcGetTypeName.cpp index b0348408f3..7f49ea984b 100644 --- a/hipamd/tests/src/hiprtc/hiprtcGetTypeName.cpp +++ b/hipamd/tests/src/hiprtc/hiprtcGetTypeName.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc - * TEST: %t + * TEST: %t EXCLUDE_HIP_PLATFORM hcc * HIT_END */ diff --git a/hipamd/tests/src/hiprtc/saxpy.cpp b/hipamd/tests/src/hiprtc/saxpy.cpp index 5f9dc7a125..437420266d 100644 --- a/hipamd/tests/src/hiprtc/saxpy.cpp +++ b/hipamd/tests/src/hiprtc/saxpy.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc - * TEST: %t + * TEST: %t EXCLUDE_HIP_PLATFORM hcc * HIT_END */ From 7c20081f8d201bf8bd88dbc12c27af6306118a36 Mon Sep 17 00:00:00 2001 From: Yaxun Sam Liu Date: Fri, 31 May 2019 12:07:58 -0400 Subject: [PATCH 22/24] Add device_builtin_texture_type attribute to texture type for hip-clang This is required to support texture type for hip-clang. --- hipamd/include/hip/hcc_detail/hip_texture_types.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hipamd/include/hip/hcc_detail/hip_texture_types.h b/hipamd/include/hip/hcc_detail/hip_texture_types.h index 0a68b507e8..b229f4e696 100644 --- a/hipamd/include/hip/hcc_detail/hip_texture_types.h +++ b/hipamd/include/hip/hcc_detail/hip_texture_types.h @@ -45,10 +45,15 @@ THE SOFTWARE. * * * * *******************************************************************************/ +#if __HIP__ +#define __HIP_TEXTURE_ATTRIB __attribute__((device_builtin_texture_type)) +#else +#define __HIP_TEXTURE_ATTRIB +#endif template -struct texture : public textureReference { +struct __HIP_TEXTURE_ATTRIB texture : public textureReference { texture(int norm = 0, enum hipTextureFilterMode fMode = hipFilterModePoint, enum hipTextureAddressMode aMode = hipAddressModeClamp) { normalized = norm; From 7a2e3b6a1cba775b803c3a4934c0aa0c58688c19 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Fri, 31 May 2019 22:30:24 +0530 Subject: [PATCH 23/24] Fix wrong grid dim shown in trace --- hipamd/src/hip_module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index e8a8801e98..0b98af5a82 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -189,7 +189,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, lp.dynamic_group_mem_bytes = sharedMemBytes; // TODO - this should be part of preLaunchKernel. hStream = ihipPreLaunchKernel( - hStream, dim3(globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ), + hStream, dim3(globalWorkSizeX/localWorkSizeX, globalWorkSizeY/localWorkSizeY, globalWorkSizeZ/localWorkSizeZ), dim3(localWorkSizeX, localWorkSizeY, localWorkSizeZ), &lp, f->_name.c_str()); From 154765df88d4027e76b0cdcbf2166b4130c5737d Mon Sep 17 00:00:00 2001 From: Yaxun Sam Liu Date: Fri, 31 May 2019 23:58:59 -0400 Subject: [PATCH 24/24] Fix default HIP_VDI_HOME There is soft link /opt/rocm/bin/.hipVersion, therefore when hipcc is executed as /opt/rocm/bin/hipcc, it will set HIP_VDI_HOME to /opt/rocm, which is incorrect. Check ../lib/bitcode instead to identify HIP_VDI_HOME. --- hipamd/bin/hipcc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/bin/hipcc b/hipamd/bin/hipcc index a438f0afe5..58c8fe45c9 100755 --- a/hipamd/bin/hipcc +++ b/hipamd/bin/hipcc @@ -108,7 +108,7 @@ $HIP_RUNTIME= $hipConfig{'HIP_RUNTIME'}; # If using VDI runtime, need to find HIP_VDI_HOME if ($HIP_RUNTIME eq "VDI" and !defined $HIP_VDI_HOME) { my $hipcc_dir = dirname($0); - if (-e "$hipcc_dir/.hipVersion") { + if (-e "$hipcc_dir/../lib/bitcode") { $HIP_VDI_HOME = abs_path($hipcc_dir . "/.."); } else { $HIP_VDI_HOME = "/opt/rocm/hip";