From 516d92f4d776223165a71b4ad885a210170ab40c Mon Sep 17 00:00:00 2001 From: Paul Date: Tue, 24 Apr 2018 14:04:24 -0500 Subject: [PATCH 01/49] Add host and device targets --- CMakeLists.txt | 7 +++++-- hip-config.cmake.in | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9a3451222..62676eb7cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -215,7 +215,10 @@ if(HIP_PLATFORM STREQUAL "hcc") foreach(TARGET hip_hcc hip_hcc_static hip_device) target_include_directories(${TARGET} SYSTEM INTERFACE $/include>;${HSA_PATH}/include) endforeach() - target_link_libraries(hip_hcc INTERFACE hcc::hccrt;hcc::hc_am) + add_library(host INTERFACE) + target_link_libraries(host INTERFACE hip_hcc) + add_library(device INTERFACE) + target_link_libraries(device INTERFACE host hip_device hcc::hccrt hcc::hc_am) # Generate .hipInfo file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo}) @@ -265,7 +268,7 @@ set(BIN_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/bin) set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip) if(HIP_PLATFORM STREQUAL "hcc") - install(TARGETS hip_hcc_static hip_hcc hip_device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) + install(TARGETS hip_hcc_static hip_hcc hip_device host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) include(CMakePackageConfigHelpers) diff --git a/hip-config.cmake.in b/hip-config.cmake.in index 7e4468b94a..efcdf708bb 100644 --- a/hip-config.cmake.in +++ b/hip-config.cmake.in @@ -51,7 +51,7 @@ set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") find_dependency(hcc) include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) -set( hip_LIBRARIES hip::hip_hcc) +set( hip_LIBRARIES hip::host hip::device) set( hip_LIBRARY ${hip_LIBRARIES}) set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR}) From da302c3e9304c4cc5033ebffeb4e753cbc834697 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Mon, 7 May 2018 10:24:30 +0530 Subject: [PATCH 02/49] Added hipMemset3D --- include/hip/hcc_detail/hip_runtime_api.h | 21 +++++ include/hip/nvcc_detail/hip_runtime_api.h | 8 ++ src/hip_memory.cpp | 36 ++++++++ tests/src/runtimeApi/memory/hipMemset3D.cpp | 98 +++++++++++++++++++++ 4 files changed, 163 insertions(+) create mode 100644 tests/src/runtimeApi/memory/hipMemset3D.cpp diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 8becab3c9c..ada0fac19a 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -1450,6 +1450,27 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height,hipStream_t stream __dparm(0)); +/** + * @brief Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value. + * + * @param[in] pitchedDevPtr + * @param[in] value - constant value to be set + * @param[in] extent + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ); + +/** + * @brief Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value. + * + * @param[in] pitchedDevPtr + * @param[in] value - constant value to be set + * @param[in] extent + * @param[in] stream + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ +hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ,hipStream_t stream __dparm(0)); + /** * @brief Query memory info. * Return snapshot of free memory, and total allocatable memory on the device. diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index 6f222648a2..f8e4e39136 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -649,6 +649,14 @@ inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, si return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream)); } +inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ){ + return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent)); +} + +inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0) ){ + return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream)); +} + inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) { struct cudaDeviceProp cdprop; cudaError_t cerror; diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 48a8da81ca..d9b0c7acab 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1698,6 +1698,42 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes return ihipLogStatus(e); } +hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ) +{ + HIP_INIT_SPECIAL_API((TRACE_MCMD), &pitchedDevPtr, value, &extent); + hipError_t e = hipSuccess; + + hipStream_t stream = hipStreamNull; + // TODO - call an ihip memset so HIP_TRACE is correct. + stream = ihipSyncAndResolveStream(stream); + if (stream) { + size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; + e = ihipMemset(pitchedDevPtr.ptr, value, sizeBytes, stream, ihipMemsetDataTypeChar); + stream->locked_wait(); + } else { + e = hipErrorInvalidValue; + } + + return ihipLogStatus(e); +} + +hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ,hipStream_t stream ) +{ + HIP_INIT_SPECIAL_API((TRACE_MCMD), &pitchedDevPtr, value, &extent); + hipError_t e = hipSuccess; + + // TODO - call an ihip memset so HIP_TRACE is correct. + stream = ihipSyncAndResolveStream(stream); + if (stream) { + size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; + e = ihipMemset(pitchedDevPtr.ptr, value, sizeBytes, stream, ihipMemsetDataTypeChar); + } else { + e = hipErrorInvalidValue; + } + + return ihipLogStatus(e); +} + hipError_t hipMemGetInfo(size_t* free, size_t* total) { HIP_INIT_API(free, total); diff --git a/tests/src/runtimeApi/memory/hipMemset3D.cpp b/tests/src/runtimeApi/memory/hipMemset3D.cpp new file mode 100644 index 0000000000..40f2f3e67f --- /dev/null +++ b/tests/src/runtimeApi/memory/hipMemset3D.cpp @@ -0,0 +1,98 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +// Simple test for memset. +// Also serves as a template for other tests. + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * //Small copy + * RUN: %t -N 10 --memsetval 0x42 + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "test_common.h" + +bool testhipMemset3D(int memsetval,int p_gpuDevice) +{ + size_t numH = 256; + size_t numW = 256; + size_t depth = 1; + size_t pitch_A; + size_t width = numW * sizeof(char); + size_t sizeElements = width * numH * depth; + size_t elements = numW* numH* depth; + + + printf ("testhipMemset3D memsetval=%2x device=%d\n", memsetval, p_gpuDevice); + char *A_d; + char *A_h; + bool testResult = true; + hipExtent extent = make_hipExtent(width, numH, depth); + hipPitchedPtr devPitchedPtr; + + HIPCHECK(hipMalloc3D(&devPitchedPtr, extent)); + A_h = (char*)malloc(sizeElements); + HIPASSERT(A_h != NULL); + for (size_t i=0; i Date: Tue, 8 May 2018 13:42:00 -0400 Subject: [PATCH 03/49] add intrinsics mbcnt_lo, mbcnt_hi, lane_id --- include/hip/hcc_detail/device_functions.h | 5 + tests/src/deviceLib/hip_mbcnt.cpp | 128 ++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 tests/src/deviceLib/hip_mbcnt.cpp diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index 28d874b27a..804f34f4f6 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -345,6 +345,11 @@ __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, uns return __hip_hc_ir_usad_int(x, y, z); } +extern __device__ __attribute__((const)) unsigned int __mbcnt_lo(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.lo"); +extern __device__ __attribute__((const)) unsigned int __mbcnt_hi(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.hi"); + +__device__ static inline unsigned int __lane_id() { return __mbcnt_hi(-1, __mbcnt_lo(-1, 0)); } + /* Rounding modes are not yet supported in HIP */ diff --git a/tests/src/deviceLib/hip_mbcnt.cpp b/tests/src/deviceLib/hip_mbcnt.cpp new file mode 100644 index 0000000000..0dd7169f51 --- /dev/null +++ b/tests/src/deviceLib/hip_mbcnt.cpp @@ -0,0 +1,128 @@ +/* +Copyright (c) 2015-2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * RUN: %t + * HIT_END + */ + +#include +#include +#include +#include +#include +#include +#include + +#define HIP_ASSERT(x) (assert((x) == hipSuccess)) + +__global__ void HIP_kernel(hipLaunchParm lp, unsigned int* mbcnt_lo, unsigned int* mbcnt_hi, unsigned int* lane_id) { + int x = blockDim.x * blockIdx.x + threadIdx.x; + mbcnt_lo[x] = __mbcnt_lo(0xFFFFFFFF, 0); + mbcnt_hi[x] = __mbcnt_hi(0xFFFFFFFF, 0); + lane_id[x] = __lane_id(); +} + +using namespace std; + +int main() { + + unsigned int* device_mbcnt_lo; + unsigned int* device_mbcnt_hi; + unsigned int* device_lane_id; + + hipDeviceProp_t devProp; + hipGetDeviceProperties(&devProp, 0); + cout << " System minor " << devProp.minor << endl; + cout << " System major " << devProp.major << endl; + cout << " agent prop name " << devProp.name << endl; + + cout << "hip Device prop succeeded " << endl; + + constexpr unsigned int wave_size = 64; + constexpr unsigned int num_waves_per_block = 2; + constexpr unsigned int num_threads_per_block = wave_size * num_waves_per_block; + constexpr unsigned int num_blocks = 2; + constexpr unsigned int num_threads = num_threads_per_block * num_blocks; + constexpr size_t buffer_size = num_threads * sizeof(unsigned int); + + HIP_ASSERT(hipMalloc((void**)&device_mbcnt_lo, buffer_size)); + HIP_ASSERT(hipMalloc((void**)&device_mbcnt_hi, buffer_size)); + HIP_ASSERT(hipMalloc((void**)&device_lane_id, buffer_size)); + + hipLaunchKernel(HIP_kernel, dim3(num_blocks), + dim3(num_threads_per_block), 0, 0, device_mbcnt_lo, device_mbcnt_hi, device_lane_id); + + unsigned int* host_mbcnt_lo = (unsigned int*) malloc(buffer_size); + unsigned int* host_mbcnt_hi = (unsigned int*) malloc(buffer_size); + unsigned int* host_lane_id = (unsigned int*) malloc(buffer_size); + + HIP_ASSERT(hipMemcpy(host_mbcnt_lo, device_mbcnt_lo, buffer_size, hipMemcpyDeviceToHost)); + HIP_ASSERT(hipMemcpy(host_mbcnt_hi, device_mbcnt_hi, buffer_size, hipMemcpyDeviceToHost)); + HIP_ASSERT(hipMemcpy(host_lane_id, device_lane_id, buffer_size, hipMemcpyDeviceToHost)); + + // verify the results + int mbcnt_lo_errors = 0; + int mbcnt_hi_errors = 0; + int lane_id_errors = 0; + for (unsigned int i = 0; i < num_threads; i++) { + unsigned int this_lane_id = i % wave_size; + unsigned int this_mbcnt_lo = this_lane_id >= 32 ? 32 : this_lane_id; + unsigned int this_mbcnt_hi = this_lane_id < 32 ? 0 : (this_lane_id - 22); + + if (host_mbcnt_lo[i] != this_mbcnt_lo) + mbcnt_lo_errors++; + + if (host_mbcnt_hi[i] != this_mbcnt_hi) + mbcnt_hi_errors++; + + if (host_lane_id[i] != this_lane_id) + lane_id_errors++; + } + + if (mbcnt_lo_errors == 0) + cout << "__mbcnt_lo() PASSED!" << endl; + else + cout << "__mbcnt_lo() FAILED!" << endl; + + + if (mbcnt_hi_errors == 0) + cout << "__mbcnt_hi() PASSED!" << endl; + else + cout << "__mbcnt_hi() FAILED!" << endl; + + if (lane_id_errors == 0) + cout << "__lane_id() PASSED!" << endl; + else + cout << "__lane_id() FAILED!" << endl; + + HIP_ASSERT(hipFree(device_mbcnt_lo)); + HIP_ASSERT(hipFree(device_mbcnt_hi)); + HIP_ASSERT(hipFree(device_lane_id)); + + free(host_mbcnt_lo); + free(host_mbcnt_hi); + free(host_lane_id); + + return mbcnt_lo_errors + mbcnt_hi_errors + lane_id_errors; +} From 7672b44c790eed7cefdf8d93a2caaeb86a7ac190 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 8 May 2018 15:36:01 -0400 Subject: [PATCH 04/49] Add __assert_fail, __device_trap and hipErrorAssert for clang --- include/hip/hcc_detail/hip_runtime.h | 16 ++++++++++++++++ include/hip/hip_runtime_api.h | 2 ++ 2 files changed, 18 insertions(+) diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 92f06e9174..e9551d4f9a 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -612,6 +612,22 @@ extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim; #define hipGridDim_y gridDim.y #define hipGridDim_z gridDim.z +#pragma push_macro("__DEVICE__") +#define __DEVICE__ extern "C" __device__ inline __attribute__((always_inline)) \ + __attribute__((weak)) + +__DEVICE__ void __device_trap() __asm("llvm.trap"); + +__DEVICE__ void __assert_fail(const char * __assertion, + const char *__file, + unsigned int __line, + const char *__function) +{ + // Ignore all the args for now. + __device_trap(); +} +#pragma push_macro("__DEVICE__") + #endif #endif // HIP_HCC_DETAIL_RUNTIME_H diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h index bfde2e942d..2b36f3e140 100644 --- a/include/hip/hip_runtime_api.h +++ b/include/hip/hip_runtime_api.h @@ -243,6 +243,8 @@ typedef enum __HIP_NODISCARD hipError_t { 1062, ///< Produced when trying to unlock a non-page-locked memory. hipErrorMapBufferObjectFailed = 1071, ///< Produced when the IPC memory attach failed from ROCr. + hipErrorAssert = + 1081, ///< Produced when the kernel calls assert. hipErrorTbd ///< Marker that more error codes are needed. } hipError_t; From 19f3ed6f62ebc891f53863f08d829e60dd453169 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 8 May 2018 16:38:50 -0400 Subject: [PATCH 05/49] Fix warning about inlined function is not defined --- include/hip/hcc_detail/hip_runtime.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index e9551d4f9a..e1b334aec3 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -613,15 +613,15 @@ extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim; #define hipGridDim_z gridDim.z #pragma push_macro("__DEVICE__") -#define __DEVICE__ extern "C" __device__ inline __attribute__((always_inline)) \ +#define __DEVICE__ extern "C" __device__ __attribute__((always_inline)) \ __attribute__((weak)) __DEVICE__ void __device_trap() __asm("llvm.trap"); -__DEVICE__ void __assert_fail(const char * __assertion, - const char *__file, - unsigned int __line, - const char *__function) +__DEVICE__ void inline __assert_fail(const char * __assertion, + const char *__file, + unsigned int __line, + const char *__function) { // Ignore all the args for now. __device_trap(); From dffe1802be5cf9f05e13854207d407d76e8661c0 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 10 May 2018 17:36:51 +0300 Subject: [PATCH 06/49] [HIPIFY][DNN] support of cuDNN 7.1.3 - continuation - not finished yet. - based on https://github.com/ROCmSoftwarePlatform/hipDNN. - testing on https://github.com/baidu-research/DeepBench. --- hipify-clang/src/CUDA2HipMap.cpp | 173 ++++++++++++++++++++++++++++++- 1 file changed, 172 insertions(+), 1 deletion(-) diff --git a/hipify-clang/src/CUDA2HipMap.cpp b/hipify-clang/src/CUDA2HipMap.cpp index 6aa9e733f2..fd8446e689 100644 --- a/hipify-clang/src/CUDA2HipMap.cpp +++ b/hipify-clang/src/CUDA2HipMap.cpp @@ -2920,6 +2920,10 @@ const std::map CUDA_IDENTIFIER_MAP{ // unchanged function names: skipahead, skipahead_sequence, skipahead_subsequence ///////////////////////////// cuDNN ///////////////////////////// + // defines + {"CUDNN_VERSION", {"HIPDNN_VERSION", CONV_NUMERIC_LITERAL, API_DNN}}, // 7000 + {"CUDNN_DIM_MAX", {"HIPDNN_DIM_MAX", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 + {"cudnnContext", {"hipdnnContext", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"cudnnHandle_t", {"hipdnnHandle_t", CONV_TYPE, API_DNN}}, {"cudnnStatus_t", {"hipdnnStatus_t", CONV_TYPE, API_DNN}}, @@ -2957,6 +2961,11 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CUDNN_DATA_UINT8", {"HIPDNN_DATA_UINT8", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 {"CUDNN_DATA_UINT8x4", {"HIPDNN_DATA_UINT8x4", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 + {"cudnnErrQueryMode_t", {"hipdnnErrQueryMode_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_ERRQUERY_RAWCODE", {"HIPDNN_ERRQUERY_RAWCODE", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_ERRQUERY_NONBLOCKING", {"HIPDNN_ERRQUERY_NONBLOCKING", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 + {"CUDNN_ERRQUERY_BLOCKING", {"HIPDNN_ERRQUERY_BLOCKING", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 + {"cudnnConvolutionFwdAlgo_t", {"hipdnnConvolutionFwdAlgo_t", CONV_TYPE, API_DNN}}, {"CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 @@ -2973,26 +2982,188 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CUDNN_CONVOLUTION_FWD_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_FWD_PREFER_FASTEST", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 {"CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", {"HIPDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"cudnnDeterminism_t", {"hipdnnDeterminism_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_NON_DETERMINISTIC", {"HIPDNN_NON_DETERMINISTIC", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_DETERMINISTIC", {"HIPDNN_DETERMINISTIC", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 + {"cudnnFilterDescriptor_t", {"hipdnnFilterDescriptor_t", CONV_TYPE, API_DNN}}, + {"cudnnDropoutDescriptor_t", {"hipdnnDropoutDescriptor_t", CONV_TYPE, API_DNN}}, + {"cudnnConvolutionFwdAlgoPerf_t", {"hipdnnConvolutionFwdAlgoPerf_t", CONV_TYPE, API_DNN}}, + {"cudnnConvolutionBwdFilterAlgoPerf_t", {"hipdnnConvolutionBwdFilterAlgoPerf_t", CONV_TYPE, API_DNN}}, + {"cudnnRNNDescriptor_t", {"hipdnnRNNDescriptor_t", CONV_TYPE, API_DNN}}, + {"cudnnPersistentRNNPlan_t", {"hipdnnPersistentRNNPlan_t", CONV_TYPE, API_DNN}}, + {"cudnnTensorStruct", {"hipdnnTensorStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnConvolutionStruct", {"hipdnnConvolutionStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnPoolingStruct", {"hipdnnPoolingStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnPoolingDescriptor_t", {"hipdnnPoolingDescriptor_t", CONV_TYPE, API_DNN}}, + {"cudnnFilterStruct", {"hipdnnFilterStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnLRNDescriptor_t", {"hipdnnLRNDescriptor_t", CONV_TYPE, API_DNN}}, + {"cudnnLRNStruct", {"hipdnnLRNStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnActivationDescriptor_t", {"hipdnnActivationDescriptor_t", CONV_TYPE, API_DNN}}, + {"cudnnActivationStruct", {"hipdnnActivationStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSpatialTransformerDescriptor_t", {"hipdnnSpatialTransformerDescriptor_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSpatialTransformerStruct", {"hipdnnSpatialTransformerStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnOpTensorDescriptor_t", {"hipdnnOpTensorDescriptor_t", CONV_TYPE, API_DNN}}, + {"cudnnOpTensorStruct", {"hipdnnOpTensorStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnReduceTensorDescriptor_t", {"hipdnnReduceTensorDescriptor_t", CONV_TYPE, API_DNN}}, + {"cudnnReduceTensorStruct", {"hipdnnReduceTensorStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnCTCLossDescriptor_t", {"hipdnnCTCLossDescriptor_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnCTCLossStruct", {"hipdnnCTCLossStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + + {"cudnnRNNInputMode_t", {"hipdnnRNNInputMode_t", CONV_TYPE, API_DNN}}, + {"CUDNN_LINEAR_INPUT", {"HIPDNN_LINEAR_INPUT", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_SKIP_INPUT", {"HIPDNN_SKIP_INPUT", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + + {"cudnnDirectionMode_t", {"hipdnnDirectionMode_t", CONV_TYPE, API_DNN}}, + {"CUDNN_UNIDIRECTIONAL", {"HIPDNN_UNIDIRECTIONAL", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_BIDIRECTIONAL", {"HIPDNN_BIDIRECTIONAL", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + + {"cudnnMathType_t", {"hipdnnMathType_t", CONV_TYPE, API_DNN}}, + {"CUDNN_DEFAULT_MATH", {"HIPDNN_DEFAULT_MATH", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_TENSOR_OP_MATH", {"HIPDNN_TENSOR_OP_MATH", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + + {"cudnnNanPropagation_t", {"hipdnnNanPropagation_t", CONV_TYPE, API_DNN}}, + {"CUDNN_NOT_PROPAGATE_NAN", {"HIPDNN_NOT_PROPAGATE_NAN", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_PROPAGATE_NAN", {"HIPDNN_PROPAGATE_NAN", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + + {"cudnnConvolutionBwdDataAlgo_t", {"hipdnnConvolutionBwdDataAlgo_t", CONV_TYPE, API_DNN}}, + {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_0", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_0", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_1", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_1", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 + {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 + {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 + {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_TRANSPOSE_GEMM", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 + + {"cudnnConvolutionBwdFilterAlgo_t", {"hipdnnConvolutionBwdFilterAlgo_t", CONV_TYPE, API_DNN}}, + {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_0", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_1", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_3", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 + {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 + {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 + {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 + {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 + + {"cudnnConvolutionBwdFilterPreference_t", {"hipdnnConvolutionBwdFilterPreference_t", CONV_TYPE, API_DNN}}, + {"CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT",{"HIPDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT",CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + + {"cudnnRNNAlgo_t", {"hipdnnRNNAlgo_t", CONV_TYPE, API_DNN}}, + {"CUDNN_RNN_ALGO_STANDARD", {"HIPDNN_RNN_ALGO_STANDARD", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_RNN_ALGO_PERSIST_STATIC", {"HIPDNN_RNN_ALGO_PERSIST_STATIC", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_RNN_ALGO_PERSIST_DYNAMIC", {"HIPDNN_RNN_ALGO_PERSIST_DYNAMIC", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_RNN_ALGO_COUNT", {"HIPDNN_RNN_ALGO_COUNT", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 + + {"cudnnRNNMode_t", {"hipdnnRNNMode_t", CONV_TYPE, API_DNN}}, + {"CUDNN_RNN_RELU", {"HIPDNN_RNN_RELU", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_RNN_TANH", {"HIPDNN_RNN_TANH", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_LSTM", {"HIPDNN_LSTM", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_GRU", {"HIPDNN_GRU", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 + + {"cudnnOpTensorOp_t", {"hipdnnOpTensorOp_t", CONV_TYPE, API_DNN}}, + {"CUDNN_OP_TENSOR_ADD", {"HIPDNN_OP_TENSOR_ADD", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_OP_TENSOR_MUL", {"HIPDNN_OP_TENSOR_MUL", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_OP_TENSOR_MIN", {"HIPDNN_OP_TENSOR_MIN", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_OP_TENSOR_MAX", {"HIPDNN_OP_TENSOR_MAX", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 + {"CUDNN_OP_TENSOR_SQRT", {"HIPDNN_OP_TENSOR_SQRT", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 + {"CUDNN_OP_TENSOR_NOT", {"HIPDNN_OP_TENSOR_NOT", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 + + {"cudnnReduceTensorOp_t", {"hipdnnReduceTensorOp_t", CONV_TYPE, API_DNN}}, + {"CUDNN_REDUCE_TENSOR_ADD", {"HIPDNN_REDUCE_TENSOR_ADD", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_REDUCE_TENSOR_MUL", {"HIPDNN_REDUCE_TENSOR_MUL", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_REDUCE_TENSOR_MIN", {"HIPDNN_REDUCE_TENSOR_MIN", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_REDUCE_TENSOR_MAX", {"HIPDNN_REDUCE_TENSOR_MAX", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 + {"CUDNN_REDUCE_TENSOR_AMAX", {"HIPDNN_REDUCE_TENSOR_AMAX", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 + {"CUDNN_REDUCE_TENSOR_AVG", {"HIPDNN_REDUCE_TENSOR_AVG", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 + {"CUDNN_REDUCE_TENSOR_NORM1", {"HIPDNN_REDUCE_TENSOR_NORM1", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 + {"CUDNN_REDUCE_TENSOR_NORM2", {"HIPDNN_REDUCE_TENSOR_NORM2", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 + {"CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS", {"HIPDNN_REDUCE_TENSOR_MUL_NO_ZEROS", CONV_NUMERIC_LITERAL, API_DNN}}, // 8 + + {"cudnnReduceTensorIndices_t", {"hipdnnReduceTensorIndices_t", CONV_TYPE, API_DNN}}, + {"CUDNN_REDUCE_TENSOR_NO_INDICES", {"HIPDNN_REDUCE_TENSOR_NO_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_REDUCE_TENSOR_FLATTENED_INDICES", {"HIPDNN_REDUCE_TENSOR_FLATTENED_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + + {"cudnnIndicesType_t", {"cudnnIndicesType_t", CONV_TYPE, API_DNN}}, + {"CUDNN_32BIT_INDICES", {"CUDNN_32BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_64BIT_INDICES", {"CUDNN_64BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_16BIT_INDICES", {"CUDNN_16BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_8BIT_INDICES", {"CUDNN_8BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 {"cudnnGetVersion", {"hipdnnGetVersion", CONV_VERSION, API_DNN}}, {"cudnnGetCudartVersion", {"hipdnnGetCudartVersion", CONV_VERSION, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnQueryRuntimeError", {"hipdnnQueryRuntimeError", CONV_VERSION, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetProperty", {"hipdnnGetProperty", CONV_VERSION, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetErrorString", {"hipdnnGetErrorString", CONV_ERROR, API_DNN}}, {"cudnnCreate", {"hipdnnCreate", CONV_MATH_FUNC, API_DNN}}, {"cudnnCreateTensorDescriptor", {"hipdnnCreateTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnCreateDropoutDescriptor", {"hipdnnCreateDropoutDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnCreateReduceTensorDescriptor", {"hipdnnCreateReduceTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetReduceTensorDescriptor", {"hipdnnSetReduceTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetReduceTensorDescriptor", {"hipdnnGetReduceTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetReductionIndicesSize", {"hipdnnGetReductionIndicesSize", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetReductionWorkspaceSize", {"hipdnnGetReductionWorkspaceSize", CONV_MATH_FUNC, API_DNN}}, + {"cudnnCreateOpTensorDescriptor", {"hipdnnCreateOpTensorDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetOpTensorDescriptor", {"hipdnnSetOpTensorDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetOpTensorDescriptor", {"hipdnnGetOpTensorDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnCreateRNNDescriptor", {"hipdnnCreateRNNDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetStream", {"hipdnnSetStream", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetStream", {"hipdnnGetStream", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetRNNDescriptor_v6", {"hipdnnSetRNNDescriptor_v6", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetRNNDescriptor", {"hipdnnSetRNNDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDropoutGetStatesSize", {"hipdnnDropoutGetStatesSize", CONV_MATH_FUNC, API_DNN}}, + {"cudnnTransformTensor", {"hipdnnTransformTensor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnSetTensor4dDescriptor", {"hipdnnSetTensor4dDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetTensor4dDescriptor", {"hipdnnGetTensor4dDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnAddTensor", {"hipdnnAddTensor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnOpTensor", {"hipdnnOpTensor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetTensorSizeInBytes", {"hipdnnGetTensorSizeInBytes", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetTensor4dDescriptorEx", {"hipdnnSetTensor4dDescriptorEx", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetTensorNdDescriptor", {"hipdnnSetTensorNdDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetTensorNdDescriptor", {"hipdnnGetTensorNdDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetTensorNdDescriptorEx", {"hipdnnSetTensorNdDescriptorEx", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnFindConvolutionForwardAlgorithm", {"hipdnnFindConvolutionForwardAlgorithm", CONV_MATH_FUNC, API_DNN}}, + {"cudnnConvolutionBackwardFilter", {"hipdnnConvolutionBackwardFilter", CONV_MATH_FUNC, API_DNN}}, + {"cudnnConvolutionBackwardData", {"hipdnnConvolutionBackwardData", CONV_MATH_FUNC, API_DNN}}, + {"cudnnFindConvolutionBackwardFilterAlgorithm", {"hipdnnFindConvolutionBackwardFilterAlgorithm", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolutionBackwardFilterAlgorithm", {"hipdnnGetConvolutionBackwardFilterAlgorithm", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolutionBackwardFilterWorkspaceSize",{"hipdnnGetConvolutionBackwardFilterWorkspaceSize",CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolutionBackwardDataWorkspaceSize", {"hipdnnGetConvolutionBackwardDataWorkspaceSize", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolutionBackwardDataAlgorithm", {"hipdnnGetConvolutionBackwardDataAlgorithm", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetRNNLinLayerMatrixParams", {"hipdnnGetRNNLinLayerMatrixParams", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetRNNLinLayerBiasParams", {"hipdnnGetRNNLinLayerBiasParams", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetFilterNdDescriptor", {"hipdnnGetFilterNdDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnFindConvolutionBackwardDataAlgorithm", {"hipdnnFindConvolutionBackwardDataAlgorithm", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetDropoutDescriptor", {"hipdnnSetDropoutDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetConvolution2dDescriptor", {"hipdnnSetConvolution2dDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetConvolutionMathType", {"hipdnnSetConvolutionMathType", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetConvolution2dForwardOutputDim", {"hipdnnGetConvolution2dForwardOutputDim", CONV_MATH_FUNC, API_DNN}}, {"cudnnCreateFilterDescriptor", {"hipdnnCreateFilterDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnCreatePersistentRNNPlan", {"hipdnnCreatePersistentRNNPlan", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetPersistentRNNPlan", {"hipdnnSetPersistentRNNPlan", CONV_MATH_FUNC, API_DNN}}, + {"cudnnRNNForwardInference", {"hipdnnRNNForwardInference", CONV_MATH_FUNC, API_DNN}}, + {"cudnnRNNBackwardWeights", {"hipdnnRNNBackwardWeights", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetRNNParamsSize", {"hipdnnGetRNNParamsSize", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetRNNWorkspaceSize", {"hipdnnGetRNNWorkspaceSize", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetRNNTrainingReserveSize", {"hipdnnGetRNNTrainingReserveSize", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetFilterNdDescriptor", {"hipdnnSetFilterNdDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnRNNForwardTraining", {"hipdnnRNNForwardTraining", CONV_MATH_FUNC, API_DNN}}, + {"cudnnRNNBackwardData", {"hipdnnRNNBackwardData", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetFilter4dDescriptor", {"hipdnnSetFilter4dDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetRNNMatrixMathType", {"hipdnnSetRNNMatrixMathType", CONV_MATH_FUNC, API_DNN}}, {"cudnnCreateConvolutionDescriptor", {"hipdnnCreateConvolutionDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetConvolutionForwardAlgorithm", {"hipdnnGetConvolutionForwardAlgorithm", CONV_MATH_FUNC, API_DNN}}, {"cudnnConvolutionForward", {"hipdnnConvolutionForward", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetConvolutionForwardWorkspaceSize", {"hipdnnGetConvolutionForwardWorkspaceSize", CONV_MATH_FUNC, API_DNN}}, {"cudnnDestroyTensorDescriptor", {"hipdnnDestroyTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroyOpTensorDescriptor", {"hipdnnDestroyOpTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnDestroyConvolutionDescriptor", {"hipdnnDestroyConvolutionDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroyDropoutDescriptor", {"hipdnnDestroyDropoutDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnDestroyFilterDescriptor", {"hipdnnDestroyFilterDescriptor", CONV_MATH_FUNC, API_DNN}}, - {"cudnnDestroyFilterDescriptor", {"hipdnnDestroyFilterDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroyRNNDescriptor", {"hipdnnDestroyRNNDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroyReduceTensorDescriptor", {"hipdnnDestroyReduceTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroyPersistentRNNPlan", {"hipdnnDestroyPersistentRNNPlan", CONV_MATH_FUNC, API_DNN}}, {"cudnnDestroy", {"hipdnnDestroy", CONV_MATH_FUNC, API_DNN}}, }; From b898049412a1e101358118d75c04c110fdd340f4 Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Wed, 18 Apr 2018 15:27:56 -0400 Subject: [PATCH 07/49] initial gfx906 support --- CMakeLists.txt | 2 +- bin/hipcc | 21 +++++++++++++++++++++ lpl_ca/ca.hpp | 4 ++-- lpl_ca/common.hpp | 4 ++-- lpl_ca/lpl.hpp | 4 ++-- tests/src/deviceLib/hipTestHalf.cpp | 2 +- 6 files changed, 29 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 075b916a4d..4f770b3429 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -201,7 +201,7 @@ if(HIP_PLATFORM STREQUAL "hcc") execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS} -Wl,-Bsymbolic") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx803 --amdgpu-target=gfx900") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906") if(COMPILE_HIP_ATP_MARKER) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L/opt/rocm/profiler/CXLActivityLogger/bin/x86_64 -lCXLActivityLogger") endif() diff --git a/bin/hipcc b/bin/hipcc index f796b0bf95..b574ed29bd 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -75,6 +75,7 @@ $target_gfx801 = 0; $target_gfx802 = 0; $target_gfx803 = 0; $target_gfx900 = 0; +$target_gfx906 = 0; $default_amdgpu_target = 1; if ($HIP_PLATFORM eq "hcc") { @@ -281,6 +282,12 @@ foreach $arg (@ARGV) $target_gfx900 = 1; $default_amdgpu_target = 0; } + if($arg eq '--amdgpu-target=gfx906') + { + $target_gfx906 = 1; + $default_amdgpu_target = 0; + } + if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0)) { @@ -373,6 +380,11 @@ if($HIP_PLATFORM eq "hcc"){ $target_gfx900 = 1; $default_amdgpu_target = 0; } + if($target eq 'gfx906') + { + $target_gfx906 = 1; + $default_amdgpu_target = 0; + } } } # Else try using rocm_agent_enumerator @@ -404,6 +416,10 @@ if($HIP_PLATFORM eq "hcc"){ $target_gfx900 = 1; $default_amdgpu_target = 0; } + if($val eq "gfx906") { + $target_gfx906 = 1; + $default_amdgpu_target = 0; + } } } # rocm_agent_enumerator failed! Throw an error and die if linking is required @@ -437,6 +453,11 @@ if($HIP_PLATFORM eq "hcc"){ $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX900__=1 "; $ENV{HCC_EXTRA_LIBRARIES_GFX900}="$HIP_PATH/lib/hip_hc_gfx803.ll\n"; } + if ($target_gfx906 eq 1) { + $HIPLDFLAGS .= " --amdgpu-target=gfx906"; + $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX906__=1 "; + $ENV{HCC_EXTRA_LIBRARIES_GFX906}="$HIP_PATH/lib/hip_hc_gfx803.ll\n"; + } } if ($hasC and $HIP_PLATFORM eq 'nvcc') { diff --git a/lpl_ca/ca.hpp b/lpl_ca/ca.hpp index bb1963bede..0ef8458c20 100644 --- a/lpl_ca/ca.hpp +++ b/lpl_ca/ca.hpp @@ -23,7 +23,7 @@ inline clara::Parser cmdline_parser(bool& help, std::vector& inputs "https://reviews.llvm.org/D13909; " "the code object format is documented at: " "https://www.llvm.org/docs/AMDGPUUsage.html#code-object.") | - clara::Opt{targets, "gfx803,gfx900 etc."}["-t"]["--targets"]( + clara::Opt{targets, "gfx803,gfx900,gfx906 etc."}["-t"]["--targets"]( "targets for which code objects are to be extracted from " "the fat binary; must be included in the set of processors " "with ROCm support from " @@ -76,4 +76,4 @@ inline void validate_inputs(const std::vector& inputs) { throw std::runtime_error{"Non existent file " + *it + " passed as input."}; } } -} // namespace hip_impl \ No newline at end of file +} // namespace hip_impl diff --git a/lpl_ca/common.hpp b/lpl_ca/common.hpp index 74f4db9221..7652f08c46 100644 --- a/lpl_ca/common.hpp +++ b/lpl_ca/common.hpp @@ -12,7 +12,7 @@ namespace hip_impl { inline const std::unordered_set& amdgpu_targets() { // The evolving list lives at: // https://www.llvm.org/docs/AMDGPUUsage.html#processors. static const std::unordered_set r{"gfx701", "gfx801", "gfx802", "gfx803", - "gfx900"}; + "gfx900", "gfx906"}; return r; } @@ -77,4 +77,4 @@ inline void validate_targets(const std::vector& x) { } } } -} // Namespace hip_impl. \ No newline at end of file +} // Namespace hip_impl. diff --git a/lpl_ca/lpl.hpp b/lpl_ca/lpl.hpp index c9346fd793..cbd7fe8386 100644 --- a/lpl_ca/lpl.hpp +++ b/lpl_ca/lpl.hpp @@ -132,9 +132,9 @@ inline clara::Parser cmdline_parser(bool& help, std::vector& source "file is documented at: https://reviews.llvm.org/D13909.") | clara::Arg{sources, "a.cpp b.cpp etc."}("inputs for compilation; must contain valid C++ code.") | - clara::Opt{targets, "gfx803,gfx900 etc."}["-t"]["--targets"]( + clara::Opt{targets, "gfx803,gfx900,gfx906 etc."}["-t"]["--targets"]( "targets for AMDGPU lowering; must be included in the set " "of processors with ROCm support from " "https://www.llvm.org/docs/AMDGPUUsage.html#processors."); } -} // namespace hip_impl \ No newline at end of file +} // namespace hip_impl diff --git a/tests/src/deviceLib/hipTestHalf.cpp b/tests/src/deviceLib/hipTestHalf.cpp index 4a63260bf7..24a4d6c53e 100644 --- a/tests/src/deviceLib/hipTestHalf.cpp +++ b/tests/src/deviceLib/hipTestHalf.cpp @@ -32,7 +32,7 @@ THE SOFTWARE. #define HALF_SIZE 64 * sizeof(__half) #define HALF2_SIZE 64 * sizeof(__half2) -#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ +#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__ __global__ void __halfMath(hipLaunchParm lp, __half* A, __half* B, __half* C) { int tx = threadIdx.x; From 13274ce559551f2bbcdff10086a4734971aa8ef6 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 11 May 2018 03:35:10 +0100 Subject: [PATCH 08/49] Add support for the hipFuncGetAttributes interface. --- include/hip/hcc_detail/hip_runtime_api.h | 24 ++++++ include/hip/hcc_detail/program_state.hpp | 46 +++++++++-- src/functional_grid_launch.inl | 4 +- src/hip_module.cpp | 79 ++++++++++++++++--- src/program_state.cpp | 3 +- .../module/hipFuncGetAttributes.cpp | 53 +++++++++++++ 6 files changed, 189 insertions(+), 20 deletions(-) create mode 100644 tests/src/runtimeApi/module/hipFuncGetAttributes.cpp diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 8becab3c9c..fa39a766bc 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -94,6 +94,19 @@ typedef struct ihipModule_t* hipModule_t; typedef struct ihipModuleSymbol_t* hipFunction_t; +struct hipFuncAttributes { + int binaryVersion; + int cacheModeCA; + size_t constSizeBytes; + size_t localSizeBytes; + int maxDynamicSharedSizeBytes; + int maxThreadsPerBlock; + int numRegs; + int preferredShmemCarveout; + int ptxVersion; + size_t sharedSizeBytes; +}; + typedef struct ihipEvent_t* hipEvent_t; enum hipLimit_t { @@ -2222,6 +2235,17 @@ hipError_t hipModuleUnload(hipModule_t module); */ hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname); +/** + * @bried Find out attributes for a given function. + * + * @param [out] attr + * @param [in] func + * + * @returns hipSuccess, hipErrorInvalidDeviceFunction + */ + +hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func); + /** * @brief returns device memory pointer and size of the kernel present in the module with symbol @p * name diff --git a/include/hip/hcc_detail/program_state.hpp b/include/hip/hcc_detail/program_state.hpp index b6ccafb205..ac689fdb89 100644 --- a/include/hip/hcc_detail/program_state.hpp +++ b/include/hip/hcc_detail/program_state.hpp @@ -22,8 +22,10 @@ THE SOFTWARE. #pragma once +#include #include #include +#include #include #include @@ -46,11 +48,45 @@ struct hash { inline constexpr bool operator==(hsa_agent_t x, hsa_agent_t y) { return x.handle == y.handle; } namespace hip_impl { -struct Kernel_descriptor { - std::uint64_t kernel_object_; - std::uint32_t group_size_; - std::uint32_t private_size_; - std::string name_; +class Kernel_descriptor { + std::uint64_t kernel_object_{}; + amd_kernel_code_t const* kernel_header_{nullptr}; + std::string name_{}; +public: + Kernel_descriptor() = default; + Kernel_descriptor(std::uint64_t kernel_object, const std::string& name) + : kernel_object_{kernel_object}, name_{name} + { + bool supported{false}; + std::uint16_t min_v{UINT16_MAX}; + auto r = hsa_system_major_extension_supported( + HSA_EXTENSION_AMD_LOADER, 1, &min_v, &supported); + + if (r != HSA_STATUS_SUCCESS || !supported) return; + + hsa_ven_amd_loader_1_01_pfn_t tbl{}; + + r = hsa_system_get_major_extension_table( + HSA_EXTENSION_AMD_LOADER, + 1, + sizeof(tbl), + reinterpret_cast(&tbl)); + + if (r != HSA_STATUS_SUCCESS) return; + if (!tbl.hsa_ven_amd_loader_query_host_address) return; + + r = tbl.hsa_ven_amd_loader_query_host_address( + reinterpret_cast(kernel_object_), + reinterpret_cast(&kernel_header_)); + + if (r != HSA_STATUS_SUCCESS) return; + } + Kernel_descriptor(const Kernel_descriptor&) = default; + Kernel_descriptor(Kernel_descriptor&&) = default; + ~Kernel_descriptor() = default; + + Kernel_descriptor& operator=(const Kernel_descriptor&) = default; + Kernel_descriptor& operator=(Kernel_descriptor&&) = default; operator hipFunction_t() const { // TODO: this is awful and only meant for illustration. return reinterpret_cast(const_cast(this)); diff --git a/src/functional_grid_launch.inl b/src/functional_grid_launch.inl index b555967ebc..9ecad51476 100644 --- a/src/functional_grid_launch.inl +++ b/src/functional_grid_launch.inl @@ -107,8 +107,8 @@ namespace hip_impl it0->second.cbegin(), it0->second.cend(), [=](const pair& x) { - return x.first.handle == agent.handle; - }); + return x.first == agent; + }); if (it1 == it0->second.cend()) { throw runtime_error{ diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 84451a17a0..59831958bd 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -27,10 +27,11 @@ THE SOFTWARE. #include "hsa_helpers.hpp" #include "trace_helper.h" +#include #include #include -#include +#include #include #include #include @@ -71,9 +72,8 @@ struct ihipKernArgInfo { map kernelArguments; struct ihipModuleSymbol_t { - uint64_t _object; // The kernel object. - uint32_t _groupSegmentSize; - uint32_t _privateSegmentSize; + uint64_t _object{}; // The kernel object. + amd_kernel_code_t const* _header{}; string _name; // TODO - review for performance cost. Name is just used for debug. }; @@ -179,8 +179,10 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, aql.grid_size_x = globalWorkSizeX; aql.grid_size_y = globalWorkSizeY; aql.grid_size_z = globalWorkSizeZ; - aql.group_segment_size = f->_groupSegmentSize + sharedMemBytes; - aql.private_segment_size = f->_privateSegmentSize; + aql.group_segment_size = + f->_header->workgroup_group_segment_byte_size + sharedMemBytes; + aql.private_segment_size = + f->_header->workitem_private_segment_byte_size; aql.kernel_object = f->_object; aql.setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; aql.header = @@ -444,10 +446,10 @@ hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const ch if (kernel.handle == 0u) return hipErrorNotFound; - (*func)->_object = kernel_object(kernel); - (*func)->_groupSegmentSize = group_size(kernel); - (*func)->_privateSegmentSize = private_size(kernel); - (*func)->_name = name; + // TODO: refactor the whole ihipThisThat, which is a mess and yields the + // below, due to hipFunction_t being a pointer to ihipModuleSymbol_t. + func[0][0] = *static_cast( + Kernel_descriptor{kernel_object(kernel), name}); return hipSuccess; } @@ -471,6 +473,61 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h return ihipLogStatus(r); } +namespace +{ + inline + hipFuncAttributes make_function_attributes(const amd_kernel_code_t& header) + { + hipFuncAttributes r{}; + + hipDeviceProp_t prop{}; + hipGetDeviceProperties( + &prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); + + r.localSizeBytes = header.workitem_private_segment_byte_size; + r.sharedSizeBytes = header.workgroup_group_segment_byte_size; + r.maxDynamicSharedSizeBytes = + prop.sharedMemPerBlock - r.sharedSizeBytes; + r.numRegs = header.workitem_vgpr_count; + r.maxThreadsPerBlock = r.numRegs ? // TODO: proper query. + std::min(prop.maxThreadsPerBlock, 64 * 1024 / r.numRegs) : + prop.maxThreadsPerBlock; + r.binaryVersion = + header.amd_machine_version_major * 10 + + header.amd_machine_version_minor; + r.ptxVersion = prop.major * 10 + prop.minor; // HIP currently presents itself as PTX 3.0. + + return r; + } +} + +hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) +{ + if (!func) return hipErrorInvalidDeviceFunction; + + const auto it0 = functions().find(reinterpret_cast(func)); + + if (it0 == functions().cend()) return hipErrorInvalidDeviceFunction; + + auto agent = this_agent(); + const auto it1 = find_if( + it0->second.cbegin(), + it0->second.cend(), + [=](const pair& x) { + return x.first == agent; + }); + + if (it1 == it0->second.cend()) return hipErrorInvalidDeviceFunction; + + const auto header = static_cast(it1->second)->_header; + + if (!header) throw runtime_error{"Ill-formed Kernel_descriptor."}; + + *attr = make_function_attributes(*header); + + return hipSuccess; +} + hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) { if (!module) return hipErrorInvalidValue; @@ -487,7 +544,7 @@ hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) { (*module)->executable = hip_impl::load_executable( tmp.empty() ? read_elf_file_as_string(image) : tmp, (*module)->executable, this_agent()); - + return (*module)->executable.handle ? hipSuccess : hipErrorUnknown; } diff --git a/src/program_state.cpp b/src/program_state.cpp index 0c51fe4694..c4478bec2f 100644 --- a/src/program_state.cpp +++ b/src/program_state.cpp @@ -382,8 +382,7 @@ const unordered_map>>& fu for (auto&& kernel_symbol : it->second) { r[function.first].emplace_back( agent(kernel_symbol), - Kernel_descriptor{kernel_object(kernel_symbol), group_size(kernel_symbol), - private_size(kernel_symbol), it->first}); + Kernel_descriptor{kernel_object(kernel_symbol), it->first}); } } } diff --git a/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp b/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp new file mode 100644 index 0000000000..7d3eff5f73 --- /dev/null +++ b/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp @@ -0,0 +1,53 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include +#include +#include "test_common.h" + +__global__ +void fn(float* px, float* py) +{ + bool a[42]; + __shared__ double b[69]; + + for (auto&& x : b) x = *py++; + for (auto&& x : a) x = *px++ > 0.0; + for (auto&& x : a) if (x) *--py = *--px; +} + +int main() { + hipInit(0); + + hipFuncAttributes attr{}; + + auto r = hipFuncGetAttributes(&attr, reinterpret_cast(&fn)); + + if (r != hipSuccess || attr.maxThreadsPerBlock == 0) { + failed("Failed to read attributes."); + } + + passed(); +} From 1ba8a35dbad3c8ad26f0a35a5fe5dc7cf3cf1aa6 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 11 May 2018 11:31:46 +0100 Subject: [PATCH 09/49] Don't use magic constants, they're evil. Also clarify that the register count cannot be queried at the moment. --- src/hip_module.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 59831958bd..b174b7ad5b 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -483,14 +483,17 @@ namespace hipDeviceProp_t prop{}; hipGetDeviceProperties( &prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); - + // TODO: at the moment there is no way to query the count of registers + // available per CU, therefore we hardcode it to 64 KiRegisters. + prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024. + r.localSizeBytes = header.workitem_private_segment_byte_size; r.sharedSizeBytes = header.workgroup_group_segment_byte_size; r.maxDynamicSharedSizeBytes = prop.sharedMemPerBlock - r.sharedSizeBytes; r.numRegs = header.workitem_vgpr_count; - r.maxThreadsPerBlock = r.numRegs ? // TODO: proper query. - std::min(prop.maxThreadsPerBlock, 64 * 1024 / r.numRegs) : + r.maxThreadsPerBlock = r.numRegs ? + std::min(prop.maxThreadsPerBlock, prop.regsPerBlock / r.numRegs) : prop.maxThreadsPerBlock; r.binaryVersion = header.amd_machine_version_major * 10 + From 848a24b5246f662f5b488fe428c8b414ec708dda Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Fri, 11 May 2018 10:53:07 -0400 Subject: [PATCH 10/49] Fix hipMathFunction for gfx906 --- tests/src/deviceLib/hipMathFunctions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/src/deviceLib/hipMathFunctions.cpp b/tests/src/deviceLib/hipMathFunctions.cpp index 7fe0003672..78e85ba62a 100644 --- a/tests/src/deviceLib/hipMathFunctions.cpp +++ b/tests/src/deviceLib/hipMathFunctions.cpp @@ -29,7 +29,7 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include "test_common.h" -#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ +#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__ __global__ void kernel_abs_int64(hipLaunchParm lp, long long *input, long long *output) { int tx = threadIdx.x; From 79480d7cbddfa44e226e223985a12b503a6f0697 Mon Sep 17 00:00:00 2001 From: 949f45ac <949f45ac@googlemail.com> Date: Mon, 14 May 2018 08:34:56 +0200 Subject: [PATCH 11/49] Provide correct __mul64hi and __umul64hi builtins, using code from ROCm-Device-Libs --- src/device_functions.cpp | 54 +++++++++++++++------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/src/device_functions.cpp b/src/device_functions.cpp index 86d0530817..fe4951ec5a 100644 --- a/src/device_functions.cpp +++ b/src/device_functions.cpp @@ -276,28 +276,6 @@ struct ucharHolder { }; } __attribute__((aligned(4))); -struct uchar2Holder { - union { - unsigned int ui[2]; - unsigned char c[8]; - }; -} __attribute__((aligned(8))); - -struct intHolder { - union { - signed int si[2]; - signed int long sl; - }; -} __attribute__((aligned(8))); - -struct uintHolder { - union { - signed int ui[2]; - signed int long ul; - }; -} __attribute__((aligned(8))); - - __device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) { struct uchar2Holder cHoldVal; struct ucharHolder cHoldKey; @@ -313,21 +291,29 @@ __device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int } __device__ long long __mul64hi(long long int x, long long int y) { - struct intHolder iHold1; - struct intHolder iHold2; - iHold1.sl = x; - iHold2.sl = y; - iHold1.sl = iHold1.si[1] * iHold2.si[1]; - return iHold1.sl; + ulong x0 = (ulong)x & 0xffffffffUL; + long x1 = x >> 32; + ulong y0 = (ulong)y & 0xffffffffUL; + long y1 = y >> 32; + ulong z0 = x0*y0; + long t = x1*y0 + (z0 >> 32); + long z1 = t & 0xffffffffL; + long z2 = t >> 32; + z1 = x0*y1 + z1; + return x1*y1 + z2 + (z1 >> 32); } __device__ unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y) { - struct uintHolder uHold1; - struct uintHolder uHold2; - uHold1.ul = x; - uHold2.ul = y; - uHold1.ul = uHold1.ui[1] * uHold2.ui[1]; - return uHold1.ul; + ulong x0 = x & 0xffffffffUL; + ulong x1 = x >> 32; + ulong y0 = y & 0xffffffffUL; + ulong y1 = y >> 32; + ulong z0 = x0*y0; + ulong t = x1*y0 + (z0 >> 32); + ulong z1 = t & 0xffffffffUL; + ulong z2 = t >> 32; + z1 = x0*y1 + z1; + return x1*y1 + z2 + (z1 >> 32); } /* From b0fd0c310d516227d2b58f5959bfa2fe599f37a3 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 14 May 2018 16:23:59 +0300 Subject: [PATCH 12/49] [HIPIFY][DNN] support of cuDNN 7.1.3 - continuation 2 - not finished yet. - based on https://github.com/ROCmSoftwarePlatform/hipDNN. - testing on https://github.com/baidu-research/DeepBench - almost pass, except cusparse (not supported yet). - started testing of examples from libcudnn7-dev_7.1.3.16-1+cuda8.0_amd64 package. --- hipify-clang/src/CUDA2HipMap.cpp | 69 +++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/hipify-clang/src/CUDA2HipMap.cpp b/hipify-clang/src/CUDA2HipMap.cpp index fd8446e689..6a0dfd6ed9 100644 --- a/hipify-clang/src/CUDA2HipMap.cpp +++ b/hipify-clang/src/CUDA2HipMap.cpp @@ -3009,6 +3009,7 @@ const std::map CUDA_IDENTIFIER_MAP{ {"cudnnReduceTensorStruct", {"hipdnnReduceTensorStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"cudnnCTCLossDescriptor_t", {"hipdnnCTCLossDescriptor_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"cudnnCTCLossStruct", {"hipdnnCTCLossStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnConvolutionBwdDataAlgoPerf_t", {"hipdnnConvolutionBwdDataAlgoPerf_t", CONV_TYPE, API_DNN}}, {"cudnnRNNInputMode_t", {"hipdnnRNNInputMode_t", CONV_TYPE, API_DNN}}, {"CUDNN_LINEAR_INPUT", {"HIPDNN_LINEAR_INPUT", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 @@ -3085,17 +3086,38 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CUDNN_REDUCE_TENSOR_NO_INDICES", {"HIPDNN_REDUCE_TENSOR_NO_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_REDUCE_TENSOR_FLATTENED_INDICES", {"HIPDNN_REDUCE_TENSOR_FLATTENED_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnIndicesType_t", {"cudnnIndicesType_t", CONV_TYPE, API_DNN}}, - {"CUDNN_32BIT_INDICES", {"CUDNN_32BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_64BIT_INDICES", {"CUDNN_64BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_16BIT_INDICES", {"CUDNN_16BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_8BIT_INDICES", {"CUDNN_8BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 + {"cudnnConvolutionBwdDataPreference_t", {"hipdnnConvolutionBwdDataPreference_t", CONV_TYPE, API_DNN}}, + {"CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", {"HIPDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + + {"cudnnIndicesType_t", {"hipdnnIndicesType_t", CONV_TYPE, API_DNN}}, + {"CUDNN_32BIT_INDICES", {"HIPDNN_32BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_64BIT_INDICES", {"HIPDNN_64BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_16BIT_INDICES", {"HIPDNN_16BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_8BIT_INDICES", {"HIPDNN_8BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 + + {"cudnnSoftmaxAlgorithm_t", {"hipdnnSoftmaxAlgorithm_t", CONV_TYPE, API_DNN}}, + {"CUDNN_SOFTMAX_FAST", {"HIPDNN_SOFTMAX_FAST", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_SOFTMAX_ACCURATE", {"HIPDNN_SOFTMAX_ACCURATE", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_SOFTMAX_LOG", {"HIPDNN_SOFTMAX_LOG", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + + {"cudnnSoftmaxMode_t", {"hipdnnSoftmaxMode_t", CONV_TYPE, API_DNN}}, + {"CUDNN_SOFTMAX_MODE_INSTANCE", {"HIPDNN_SOFTMAX_MODE_INSTANCE", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_SOFTMAX_MODE_CHANNEL", {"HIPDNN_SOFTMAX_MODE_CHANNEL", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + + {"cudnnPoolingMode_t", {"hipdnnPoolingMode_t", CONV_TYPE, API_DNN}}, + {"CUDNN_POOLING_MAX", {"HIPDNN_POOLING_MAX", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", {"HIPDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", {"HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_POOLING_MAX_DETERMINISTIC", {"HIPDNN_POOLING_MAX_DETERMINISTIC", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 {"cudnnGetVersion", {"hipdnnGetVersion", CONV_VERSION, API_DNN}}, {"cudnnGetCudartVersion", {"hipdnnGetCudartVersion", CONV_VERSION, API_DNN, HIP_UNSUPPORTED}}, {"cudnnQueryRuntimeError", {"hipdnnQueryRuntimeError", CONV_VERSION, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetProperty", {"hipdnnGetProperty", CONV_VERSION, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetErrorString", {"hipdnnGetErrorString", CONV_ERROR, API_DNN}}, + {"cudnnCreate", {"hipdnnCreate", CONV_MATH_FUNC, API_DNN}}, {"cudnnCreateTensorDescriptor", {"hipdnnCreateTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnCreateDropoutDescriptor", {"hipdnnCreateDropoutDescriptor", CONV_MATH_FUNC, API_DNN}}, @@ -3124,21 +3146,35 @@ const std::map CUDA_IDENTIFIER_MAP{ {"cudnnGetTensorNdDescriptor", {"hipdnnGetTensorNdDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetTensorNdDescriptorEx", {"hipdnnSetTensorNdDescriptorEx", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnFindConvolutionForwardAlgorithm", {"hipdnnFindConvolutionForwardAlgorithm", CONV_MATH_FUNC, API_DNN}}, + {"cudnnFindConvolutionForwardAlgorithmEx", {"hipdnnFindConvolutionForwardAlgorithmEx", CONV_MATH_FUNC, API_DNN}}, {"cudnnConvolutionBackwardFilter", {"hipdnnConvolutionBackwardFilter", CONV_MATH_FUNC, API_DNN}}, {"cudnnConvolutionBackwardData", {"hipdnnConvolutionBackwardData", CONV_MATH_FUNC, API_DNN}}, {"cudnnFindConvolutionBackwardFilterAlgorithm", {"hipdnnFindConvolutionBackwardFilterAlgorithm", CONV_MATH_FUNC, API_DNN}}, + {"cudnnFindConvolutionBackwardFilterAlgorithmEx", {"hipdnnFindConvolutionBackwardFilterAlgorithmEx", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetConvolutionBackwardFilterAlgorithm", {"hipdnnGetConvolutionBackwardFilterAlgorithm", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolutionBackwardFilterAlgorithm_v7", {"hipdnnGetConvolutionBackwardFilterAlgorithm_v7", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetConvolutionBackwardFilterWorkspaceSize",{"hipdnnGetConvolutionBackwardFilterWorkspaceSize",CONV_MATH_FUNC, API_DNN}}, {"cudnnGetConvolutionBackwardDataWorkspaceSize", {"hipdnnGetConvolutionBackwardDataWorkspaceSize", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetConvolutionBackwardDataAlgorithm", {"hipdnnGetConvolutionBackwardDataAlgorithm", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolutionBackwardDataAlgorithm_v7", {"hipdnnGetConvolutionBackwardDataAlgorithm_v7", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetConvolutionBackwardDataAlgorithmMaxCount", {"hipdnnGetConvolutionBackwardDataAlgorithmMaxCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetConvolutionForwardAlgorithmMaxCount", {"hipdnnGetConvolutionForwardAlgorithmMaxCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetRNNLinLayerMatrixParams", {"hipdnnGetRNNLinLayerMatrixParams", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetRNNLinLayerBiasParams", {"hipdnnGetRNNLinLayerBiasParams", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetFilterNdDescriptor", {"hipdnnGetFilterNdDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnFindConvolutionBackwardDataAlgorithm", {"hipdnnFindConvolutionBackwardDataAlgorithm", CONV_MATH_FUNC, API_DNN}}, + {"cudnnFindConvolutionBackwardDataAlgorithmEx", {"hipdnnFindConvolutionBackwardDataAlgorithmEx", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetDropoutDescriptor", {"hipdnnSetDropoutDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetConvolution2dDescriptor", {"hipdnnSetConvolution2dDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolution2dDescriptor", {"hipdnnGetConvolution2dDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetConvolutionMathType", {"hipdnnSetConvolutionMathType", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolutionMathType", {"hipdnnGetConvolutionMathType", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetConvolutionGroupCount", {"hipdnnSetConvolutionGroupCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetConvolutionGroupCount", {"hipdnnGetConvolutionGroupCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetConvolution2dForwardOutputDim", {"hipdnnGetConvolution2dForwardOutputDim", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetConvolutionNdDescriptor", {"hipdnnSetConvolutionNdDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolutionNdDescriptor", {"hipdnnGetConvolutionNdDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolutionNdForwardOutputDim", {"hipdnnGetConvolutionNdForwardOutputDim", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnCreateFilterDescriptor", {"hipdnnCreateFilterDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnCreatePersistentRNNPlan", {"hipdnnCreatePersistentRNNPlan", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetPersistentRNNPlan", {"hipdnnSetPersistentRNNPlan", CONV_MATH_FUNC, API_DNN}}, @@ -3154,8 +3190,16 @@ const std::map CUDA_IDENTIFIER_MAP{ {"cudnnSetRNNMatrixMathType", {"hipdnnSetRNNMatrixMathType", CONV_MATH_FUNC, API_DNN}}, {"cudnnCreateConvolutionDescriptor", {"hipdnnCreateConvolutionDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetConvolutionForwardAlgorithm", {"hipdnnGetConvolutionForwardAlgorithm", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolutionForwardAlgorithm_v7", {"hipdnnGetConvolutionForwardAlgorithm_v7", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnConvolutionForward", {"hipdnnConvolutionForward", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetConvolutionForwardWorkspaceSize", {"hipdnnGetConvolutionForwardWorkspaceSize", CONV_MATH_FUNC, API_DNN}}, + {"cudnnConvolutionBiasActivationForward", {"hipdnnConvolutionBiasActivationForward", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetConvolutionBackwardFilterAlgorithmMaxCount", {"hipdnnGetConvolutionBackwardFilterAlgorithmMaxCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnConvolutionBackwardBias", {"hipdnnConvolutionBackwardBias", CONV_MATH_FUNC, API_DNN}}, + {"cudnnReduceTensor", {"hipdnnReduceTensor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetTensor", {"hipdnnSetTensor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnScaleTensor", {"hipdnnScaleTensor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnIm2Col", {"hipdnnIm2Col", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnDestroyTensorDescriptor", {"hipdnnDestroyTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnDestroyOpTensorDescriptor", {"hipdnnDestroyOpTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnDestroyConvolutionDescriptor", {"hipdnnDestroyConvolutionDescriptor", CONV_MATH_FUNC, API_DNN}}, @@ -3166,6 +3210,21 @@ const std::map CUDA_IDENTIFIER_MAP{ {"cudnnDestroyPersistentRNNPlan", {"hipdnnDestroyPersistentRNNPlan", CONV_MATH_FUNC, API_DNN}}, {"cudnnDestroy", {"hipdnnDestroy", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSoftmaxForward", {"hipdnnSoftmaxForward", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSoftmaxBackward", {"hipdnnSoftmaxBackward", CONV_MATH_FUNC, API_DNN}}, + + {"cudnnCreatePoolingDescriptor", {"hipdnnCreatePoolingDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetPooling2dDescriptor", {"hipdnnSetPooling2dDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetPooling2dDescriptor", {"hipdnnGetPooling2dDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetPoolingNdDescriptor", {"hipdnnSetPoolingNdDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetPoolingNdDescriptor", {"hipdnnGetPoolingNdDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetPoolingNdForwardOutputDim", {"hipdnnGetPoolingNdForwardOutputDim", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetPooling2dForwardOutputDim", {"hipdnnGetPooling2dForwardOutputDim", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroyPoolingDescriptor", {"hipdnnDestroyPoolingDescriptor", CONV_MATH_FUNC, API_DNN}}, + + {"cudnnPoolingForward", {"hipdnnPoolingForward", CONV_MATH_FUNC, API_DNN}}, + {"cudnnPoolingBackward", {"hipdnnPoolingBackward", CONV_MATH_FUNC, API_DNN}}, + }; const std::map& CUDA_RENAMES_MAP() { From 5325b6535e9d38ad0ef249cd928f12dd0a95e1d1 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Mon, 14 May 2018 17:15:36 +0100 Subject: [PATCH 13/49] Update hip_module.cpp --- src/hip_module.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hip_module.cpp b/src/hip_module.cpp index b174b7ad5b..e4b050af19 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -506,6 +506,7 @@ namespace hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { + if (!attr) return hipErrorInvalidValue; if (!func) return hipErrorInvalidDeviceFunction; const auto it0 = functions().find(reinterpret_cast(func)); From 8f010ac68e57f9fbef997ed642253561ac19ba7a Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 16 May 2018 11:07:10 +0530 Subject: [PATCH 14/49] Fixed hipMemcpy2D to handle 1D memcpy case --- src/hip_memory.cpp | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 48a8da81ca..cd744c8cc0 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1261,21 +1261,24 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, h // TODO - review and optimize hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { - if (width > dpitch || width > spitch) return hipErrorUnknown; + if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue; hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); hc::completion_future marker; hipError_t e = hipSuccess; - - try { - for (int i = 0; i < height; ++i) { - stream->locked_copySync((unsigned char*)dst + i * dpitch, + if((width == dpitch) && (width == spitch)) { + stream->locked_copySync((void*)dst, (void*)src, width*height, kind, false); + } else { + try { + for (int i = 0; i < height; ++i) { + stream->locked_copySync((unsigned char*)dst + i * dpitch, (unsigned char*)src + i * spitch, width, kind); + } + } catch (ihipException& ex) { + e = ex._code; } - } catch (ihipException& ex) { - e = ex._code; } return e; @@ -1303,15 +1306,19 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream); - if (width > dpitch || width > spitch) return ihipLogStatus(hipErrorUnknown); + if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return ihipLogStatus(hipErrorInvalidValue); hipError_t e = hipSuccess; - try { - for (int i = 0; i < height; ++i) { - e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch, + if((width == dpitch) && (width == spitch)) { + hip_internal::memcpyAsync(dst, src, width*height, kind, stream); + } else { + try { + for (int i = 0; i < height; ++i) { + e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch, (unsigned char*)src + i * spitch, width, kind, stream); + } + } catch (ihipException& ex) { + e = ex._code; } - } catch (ihipException& ex) { - e = ex._code; } return ihipLogStatus(e); From dc4d305c25b8c894f91fc90bc76c794aa492a413 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 16 May 2018 15:01:32 +0530 Subject: [PATCH 15/49] Fix hipCommander Makefile --- samples/1_Utils/hipCommander/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/1_Utils/hipCommander/Makefile b/samples/1_Utils/hipCommander/Makefile index a411763b7f..c3a19073f0 100644 --- a/samples/1_Utils/hipCommander/Makefile +++ b/samples/1_Utils/hipCommander/Makefile @@ -19,7 +19,7 @@ $(EXE): hipCommander.cpp $(HIPCC) $(CXXFLAGS) $^ -o $@ nullkernel.hsaco : nullkernel.hip.cpp - $(HIPCC) --genco nullkernel.hip -o nullkernel.hsaco + $(HIPCC) --genco nullkernel.hip.cpp -o nullkernel.hsaco install: $(EXE) From 8303bfdffdfb5248c34d62d004821f0f8270eba9 Mon Sep 17 00:00:00 2001 From: 949f45ac <949f45ac@googlemail.com> Date: Thu, 17 May 2018 10:55:45 +0200 Subject: [PATCH 16/49] Reinstate accidentally deleted uchar2Holder --- src/device_functions.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/device_functions.cpp b/src/device_functions.cpp index fe4951ec5a..879d354337 100644 --- a/src/device_functions.cpp +++ b/src/device_functions.cpp @@ -276,6 +276,13 @@ struct ucharHolder { }; } __attribute__((aligned(4))); +struct uchar2Holder { + union { + unsigned int ui[2]; + unsigned char c[8]; + }; +} __attribute__((aligned(8))); + __device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) { struct uchar2Holder cHoldVal; struct ucharHolder cHoldKey; From f4d79a1615cd62053f9edc726bdec01563a3aa5b Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Thu, 17 May 2018 13:08:55 -0400 Subject: [PATCH 17/49] Let hipcc suport hip-clang --- bin/hipcc | 61 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 7 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index f796b0bf95..dee0c9d4b9 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -50,6 +50,8 @@ $verbose = $ENV{'HIPCC_VERBOSE'} // 0; # Verbose: 0x1=commands, 0x2=paths, 0x4=hipcc args $HIP_PATH=$ENV{'HIP_PATH'} // dirname (dirname $0); # use parent directory of hipcc +$HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'}; +$DEVICE_LIB_PATH=$ENV{'DEVICE_LIB_PATH'}; #--- # Read .hipInfo @@ -62,6 +64,10 @@ $HIP_PLATFORM= `$HIP_PATH/bin/hipconfig --platform` // "hcc"; $HIP_VERSION= `$HIP_PATH/bin/hipconfig --version`; ($HIP_VERSION_MAJOR, $HIP_VERSION_MINOR, $HIP_VERSION_PATCH) = split(/\./, $HIP_VERSION); +if (defined $HIP_CLANG_PATH) { + $HIP_PLATFORM = "clang" +} + if ($verbose & 0x2) { print ("HIP_PATH=$HIP_PATH\n"); print ("HIP_PLATFORM=$HIP_PLATFORM\n"); @@ -77,7 +83,16 @@ $target_gfx803 = 0; $target_gfx900 = 0; $default_amdgpu_target = 1; -if ($HIP_PLATFORM eq "hcc") { +if ($HIP_PLATFORM eq "clang") { + if ($verbose & 0x2) { + print ("HIP_CLANG_PATH=$HIP_CLANG_PATH\n"); + } + $ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm"; + $HIPCC="$HIP_CLANG_PATH/clang++"; + $HIPCXXFLAGS .= " -I$HIP_PATH/include"; + + $HIPLDFLAGS = "--hip-link --hip-device-lib-path=$DEVICE_LIB_PATH -L$HIP_PATH/lib -lhip_hcc"; +} elsif ($HIP_PLATFORM eq "hcc") { $HSA_PATH=$ENV{'HSA_PATH'} // "/opt/rocm/hsa"; $HCC_HOME=$ENV{'HCC_HOME'} // $hipConfig{'HCC_HOME'} // "/opt/rocm/hcc"; @@ -330,10 +345,16 @@ foreach $arg (@ARGV) if (($arg =~ /\.cpp$/) or ($arg =~ /\.c$/) or ($arg =~ /\.cc$/) ) { $hasC = 1; $needCXXFLAGS = 1; + if ($HIP_PLATFORM eq 'clang') { + $toolArgs .= " -x hip" + } } if (($arg =~ /\.cu$/) or ($arg =~ /\.cuh$/)) { $hasCU = 1; $needCXXFLAGS = 1; + if ($HIP_PLATFORM eq 'clang') { + $toolArgs .= " -x hip" + } } push (@inputs, $arg); @@ -342,7 +363,7 @@ foreach $arg (@ARGV) $toolArgs .= " $arg" unless $swallowArg; } -if($HIP_PLATFORM eq "hcc"){ +if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ # No AMDGPU target specified at commandline. So look for HCC_AMDGPU_TARGET if($default_amdgpu_target eq 1 and defined $ENV{HCC_AMDGPU_TARGET}) { @@ -414,26 +435,51 @@ if($HIP_PLATFORM eq "hcc"){ $ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_hc.ll\n"; + if($HIP_PLATFORM eq "hcc") { + $GPU_ARCH_OPT = " --amdgpu-target="; + } else { + $GPU_ARCH_OPT = " --cuda-gpu-arch="; + } # Handle ROCm target platform if ($target_gfx701 eq 1) { - $HIPLDFLAGS .= " --amdgpu-target=gfx701"; + $GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx701"; + $HIPLDFLAGS .= $GPU_ARCH_ARG; + if ($HIP_PLATFORM eq 'clang') { + $HIPCXXFLAGS .= $GPU_ARCH_ARG;; + } $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX701__=1 "; } if ($target_gfx801 eq 1) { - $HIPLDFLAGS .= " --amdgpu-target=gfx801"; + $GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx801"; + $HIPLDFLAGS .= $GPU_ARCH_ARG; + if ($HIP_PLATFORM eq 'clang') { + $HIPCXXFLAGS .= $GPU_ARCH_ARG;; + } $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX801__=1 "; } if ($target_gfx802 eq 1) { - $HIPLDFLAGS .= " --amdgpu-target=gfx802"; + $GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx802"; + $HIPLDFLAGS .= $GPU_ARCH_ARG; + if ($HIP_PLATFORM eq 'clang') { + $HIPCXXFLAGS .= $GPU_ARCH_ARG;; + } $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX802__=1 "; } if ($target_gfx803 eq 1) { - $HIPLDFLAGS .= " --amdgpu-target=gfx803"; + $GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx803"; + $HIPLDFLAGS .= $GPU_ARCH_ARG; + if ($HIP_PLATFORM eq 'clang') { + $HIPCXXFLAGS .= $GPU_ARCH_ARG;; + } $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX803__=1 "; $ENV{HCC_EXTRA_LIBRARIES_GFX803}="$HIP_PATH/lib/hip_hc_gfx803.ll\n"; } if ($target_gfx900 eq 1) { - $HIPLDFLAGS .= " --amdgpu-target=gfx900"; + $GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx900"; + $HIPLDFLAGS .= $GPU_ARCH_ARG; + if ($HIP_PLATFORM eq 'clang') { + $HIPCXXFLAGS .= $GPU_ARCH_ARG;; + } $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX900__=1 "; $ENV{HCC_EXTRA_LIBRARIES_GFX900}="$HIP_PATH/lib/hip_hc_gfx803.ll\n"; } @@ -445,6 +491,7 @@ if ($hasC and $HIP_PLATFORM eq 'nvcc') { if ($hasCU and $HIP_PLATFORM eq 'hcc') { $HIPCXXFLAGS .= " -x c++"; } + if ($buildDeps and $HIP_PLATFORM eq 'nvcc') { $HIPCXXFLAGS .= " -M -D__CUDACC__"; } From d0794638872c9f5241fe48c5a37d67386658dfef Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Thu, 17 May 2018 17:04:51 -0400 Subject: [PATCH 18/49] Change HIP fat binary magic number --- src/hip_clang.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hip_clang.cpp b/src/hip_clang.cpp index 80b6111fc2..f7573e2819 100644 --- a/src/hip_clang.cpp +++ b/src/hip_clang.cpp @@ -27,10 +27,10 @@ THE SOFTWARE. #include "hip_hcc_internal.h" #include "trace_helper.h" -constexpr unsigned __cudaFatMAGIC2 = 0x466243b1; +constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" #define CLANG_OFFLOAD_BUNDLER_MAGIC "__CLANG_OFFLOAD_BUNDLE__" -#define AMDGCN_AMDHSA_TRIPLE "openmp-amdgcn--amdhsa" +#define AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa" struct __ClangOffloadBundleDesc { uint64_t offset; @@ -59,7 +59,7 @@ __hipRegisterFatBinary(const void* data) HIP_INIT(); const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); - if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) { + if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { return nullptr; } From 1a108ef7f31908648493d59f2e4fd2ce1c893cea Mon Sep 17 00:00:00 2001 From: founta Date: Fri, 18 May 2018 09:11:50 -0400 Subject: [PATCH 19/49] defined hipPitchedPtr Added a define for hipPitchedPtr to resolve a compiler error --- include/hip/nvcc_detail/hip_runtime_api.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index f8e4e39136..92393ab2e0 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -160,6 +160,7 @@ typedef cudaSurfaceObject_t hipSurfaceObject_t; #define hipDeviceMapHost cudaDeviceMapHost #define hipExtent cudaExtent +#define hipPitchedPtr cudaPitchedPtr #define make_hipExtent make_cudaExtent #define make_hipPos make_cudaPos #define make_hipPitchedPtr make_cudaPitchedPtr From afe62e703012d19874649f202cf09f7e1c427400 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Fri, 18 May 2018 21:09:50 +0530 Subject: [PATCH 20/49] Fix for memcpy2DAsync for pinned host memory case --- src/hip_memory.cpp | 93 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 21 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 8f4b64c51e..e2202e7860 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1303,27 +1303,6 @@ hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { return ihipLogStatus(e); } -hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream); - if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return ihipLogStatus(hipErrorInvalidValue); - hipError_t e = hipSuccess; - if((width == dpitch) && (width == spitch)) { - hip_internal::memcpyAsync(dst, src, width*height, kind, stream); - } else { - try { - for (int i = 0; i < height; ++i) { - e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch, - (unsigned char*)src + i * spitch, width, kind, stream); - } - } catch (ihipException& ex) { - e = ex._code; - } - } - - return ihipLogStatus(e); -} - hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, wOffset, hOffset, src, spitch, width, height, kind); @@ -1554,6 +1533,19 @@ inline const T& clamp_integer(const T& x, const T& lower, const T& upper) { return std::min(upper, std::max(x, lower)); } + +template +__global__ void hip_copy_n(T* dst, const T* src, size_t n) { + const uint32_t grid_dim = gridDim.x * blockDim.x; + + size_t idx = blockIdx.x * block_dim + threadIdx.x; + while (idx < n) { + // __builtin_memcpy(reinterpret_cast(dst+idx), reinterpret_cast(src+idx), + // sizeof(T)); + dst[idx] = src[idx]; + idx += grid_dim; + } +} } // namespace template @@ -1566,6 +1558,16 @@ void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) { sizeBytes, std::move(val)); } +template +void ihipMemcpyKernel(hipStream_t stream, T* dst, const T* src, size_t sizeBytes) { + static constexpr uint32_t block_dim_ = 256; + + const uint32_t grid_dim = clamp_integer(sizeBytes / block_dim_, 1, UINT32_MAX); + + hipLaunchKernelGGL(hip_copy_n, dim3(grid_dim), dim3{block_dim_}, 0u, stream, dst, src, + sizeBytes); +} + typedef enum ihipMemsetDataType { ihipMemsetDataTypeChar = 0, ihipMemsetDataTypeShort = 1, @@ -1623,6 +1625,55 @@ hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t strea return e; }; +int isLockedPointer(const void *ptr) +{ + hsa_amd_pointer_info_t info; + int isLocked = 0; + + info.size = sizeof(info); + hsa_status_t hsa_status = hsa_amd_pointer_info(const_cast(ptr), &info, nullptr, nullptr, nullptr); + if(hsa_status != HSA_STATUS_SUCCESS) { + return -1; + } + + if((info.type == HSA_EXT_POINTER_TYPE_HSA) || (info.type == HSA_EXT_POINTER_TYPE_LOCKED)) { + isLocked = 1; + } + + return isLocked; +} + +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream) { + HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream); + if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return ihipLogStatus(hipErrorInvalidValue); + hipError_t e = hipSuccess; + int isLocked = 0; + if(kind == hipMemcpyHostToDevice) { + isLocked = isLockedPointer(src); + } else if(kind == hipMemcpyDeviceToHost) { + isLocked = isLockedPointer(dst); + } + if((width == dpitch) && (width == spitch)) { + hip_internal::memcpyAsync(dst, src, width*height, kind, stream); + } else { + try { + for (int i = 0; i < height; ++i) { + if(!isLocked) { + e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch, + (unsigned char*)src + i * spitch, width, kind, stream); + } else{ + size_t sizeBytes = width*height; + ihipMemcpyKernel (stream, static_cast (dst), static_cast (src), sizeBytes/sizeof(uint32_t)); + } + } + } catch (ihipException& ex) { + e = ex._code; + } + } + + return ihipLogStatus(e); +} // TODO-sync: function is async unless target is pinned host memory - then these are fully sync. hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { From cd6c979c278f7accb189a453dc226e392e38d74a Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 18 May 2018 17:50:45 +0100 Subject: [PATCH 21/49] Update hip_module.cpp Typo. --- src/hip_module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hip_module.cpp b/src/hip_module.cpp index e4b050af19..a6d486b6de 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -485,7 +485,7 @@ namespace &prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); // TODO: at the moment there is no way to query the count of registers // available per CU, therefore we hardcode it to 64 KiRegisters. - prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024. + prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; r.localSizeBytes = header.workitem_private_segment_byte_size; r.sharedSizeBytes = header.workgroup_group_segment_byte_size; From 661561eeadcd493cc8dddd3fd511cf75bee76007 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Mon, 21 May 2018 11:33:09 +0530 Subject: [PATCH 22/49] Add hipMalloc3D to nvcc detail Change-Id: I8a5654066ed1504e3b05eddbbdebf05fd52aa149 --- include/hip/nvcc_detail/hip_runtime_api.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index 92393ab2e0..bd3ffc1cc1 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -382,6 +382,10 @@ inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height)); } +inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { + return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent)); +} + inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); } inline static hipError_t hipMallocHost(void** ptr, size_t size) From 305592d622a67e04a9cf80e60cc9bb55778179f4 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Mon, 21 May 2018 11:35:03 +0530 Subject: [PATCH 23/49] Disable incomplete unit tests that don't work on nvcc path Change-Id: If5823ec96a3b2497a08c46ab802c5a0158271053 --- tests/src/deviceLib/hipMathFunctions.cpp | 2 +- tests/src/deviceLib/hipTestHalf.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/src/deviceLib/hipMathFunctions.cpp b/tests/src/deviceLib/hipMathFunctions.cpp index 78e85ba62a..f89bdae149 100644 --- a/tests/src/deviceLib/hipMathFunctions.cpp +++ b/tests/src/deviceLib/hipMathFunctions.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc * RUN: %t * HIT_END */ diff --git a/tests/src/deviceLib/hipTestHalf.cpp b/tests/src/deviceLib/hipTestHalf.cpp index 24a4d6c53e..5a2aac2b29 100644 --- a/tests/src/deviceLib/hipTestHalf.cpp +++ b/tests/src/deviceLib/hipTestHalf.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc * RUN: %t * HIT_END */ From 0180a8296380deb51741adb532c9c94da678dca5 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Mon, 21 May 2018 15:38:44 +0530 Subject: [PATCH 24/49] hipMemcpy returns success if sizeBytes is 0. Fixes SWDEV-153754 & SWDEV-154178. --- src/hip_memory.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index e2202e7860..443e645097 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1132,15 +1132,19 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, src, sizeBytes, kind); + hipError_t e = hipSuccess; + + // Return success if number of bytes to copy is 0 + if (sizeBytes == 0) return ihipLogStatus(e); + hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); hc::completion_future marker; - hipError_t e = hipSuccess; if(dst==NULL || src==NULL) { e=hipErrorInvalidValue; - return e; + return ihipLogStatus(e); } try { stream->locked_copySync(dst, src, sizeBytes, kind); From a5df3a484c762b1485fbe0ae9acfdef69dc0166b Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 21 May 2018 18:31:20 +0300 Subject: [PATCH 25/49] [HIPIFY][DNN] support of cuDNN 7.1.3 - finishing --- hipify-clang/src/CUDA2HipMap.cpp | 133 +++++++++++++++++++++++++------ 1 file changed, 109 insertions(+), 24 deletions(-) diff --git a/hipify-clang/src/CUDA2HipMap.cpp b/hipify-clang/src/CUDA2HipMap.cpp index 6a0dfd6ed9..7d10b35e48 100644 --- a/hipify-clang/src/CUDA2HipMap.cpp +++ b/hipify-clang/src/CUDA2HipMap.cpp @@ -2923,6 +2923,14 @@ const std::map CUDA_IDENTIFIER_MAP{ // defines {"CUDNN_VERSION", {"HIPDNN_VERSION", CONV_NUMERIC_LITERAL, API_DNN}}, // 7000 {"CUDNN_DIM_MAX", {"HIPDNN_DIM_MAX", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 + {"CUDNN_LRN_MIN_N", {"HIPDNN_LRN_MIN_N", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 + {"CUDNN_LRN_MAX_N", {"HIPDNN_LRN_MAX_N", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 16 + {"CUDNN_LRN_MIN_K", {"HIPDNN_LRN_MIN_K", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1e-5 + {"CUDNN_LRN_MIN_BETA", {"HIPDNN_LRN_MIN_BETA", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0.01 + {"CUDNN_BN_MIN_EPSILON", {"HIPDNN_BN_MIN_EPSILON", CONV_NUMERIC_LITERAL, API_DNN}}, // 1e-5 + {"CUDNN_SEV_ERROR_EN", {"HIPDNN_SEV_ERROR_EN", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_SEV_WARNING_EN", {"HIPDNN_SEV_WARNING_EN", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_SEV_INFO_EN", {"HIPDNN_SEV_INFO_EN", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, {"cudnnContext", {"hipdnnContext", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"cudnnHandle_t", {"hipdnnHandle_t", CONV_TYPE, API_DNN}}, @@ -2960,11 +2968,15 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CUDNN_DATA_INT8x4", {"HIPDNN_DATA_INT8x4", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 {"CUDNN_DATA_UINT8", {"HIPDNN_DATA_UINT8", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 {"CUDNN_DATA_UINT8x4", {"HIPDNN_DATA_UINT8x4", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 - {"cudnnErrQueryMode_t", {"hipdnnErrQueryMode_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"CUDNN_ERRQUERY_RAWCODE", {"HIPDNN_ERRQUERY_RAWCODE", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 {"CUDNN_ERRQUERY_NONBLOCKING", {"HIPDNN_ERRQUERY_NONBLOCKING", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 {"CUDNN_ERRQUERY_BLOCKING", {"HIPDNN_ERRQUERY_BLOCKING", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 + {"cudnnSeverity_t", {"hipdnnSeverity_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_SEV_FATAL", {"HIPDNN_SEV_FATAL", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_SEV_ERROR", {"HIPDNN_SEV_ERROR", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 + {"CUDNN_SEV_WARNING", {"HIPDNN_SEV_WARNING", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 + {"CUDNN_SEV_INFO", {"HIPDNN_SEV_INFO", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 {"cudnnConvolutionFwdAlgo_t", {"hipdnnConvolutionFwdAlgo_t", CONV_TYPE, API_DNN}}, {"CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 @@ -2976,21 +2988,25 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 {"CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 {"CUDNN_CONVOLUTION_FWD_ALGO_COUNT", {"HIPDNN_CONVOLUTION_FWD_ALGO_COUNT", CONV_NUMERIC_LITERAL, API_DNN}}, // 8 - {"cudnnConvolutionFwdPreference_t", {"hipdnnConvolutionFwdPreference_t", CONV_TYPE, API_DNN}}, {"CUDNN_CONVOLUTION_FWD_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_FWD_NO_WORKSPACE", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_CONVOLUTION_FWD_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_FWD_PREFER_FASTEST", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 {"CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", {"HIPDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnDeterminism_t", {"hipdnnDeterminism_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"CUDNN_NON_DETERMINISTIC", {"HIPDNN_NON_DETERMINISTIC", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 {"CUDNN_DETERMINISTIC", {"HIPDNN_DETERMINISTIC", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 + {"cudnnDivNormMode_t", {"hipdnnDivNormMode_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_DIVNORM_PRECOMPUTED_MEANS", {"HIPDNN_DIVNORM_PRECOMPUTED_MEANS", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 + {"cudnnCTCLossAlgo_t", {"hipdnnCTCLossAlgo_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_CTC_LOSS_ALGO_DETERMINISTIC", {"HIPDNN_CTC_LOSS_ALGO_DETERMINISTIC", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 + {"CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", {"HIPDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 {"cudnnFilterDescriptor_t", {"hipdnnFilterDescriptor_t", CONV_TYPE, API_DNN}}, {"cudnnDropoutDescriptor_t", {"hipdnnDropoutDescriptor_t", CONV_TYPE, API_DNN}}, {"cudnnConvolutionFwdAlgoPerf_t", {"hipdnnConvolutionFwdAlgoPerf_t", CONV_TYPE, API_DNN}}, {"cudnnConvolutionBwdFilterAlgoPerf_t", {"hipdnnConvolutionBwdFilterAlgoPerf_t", CONV_TYPE, API_DNN}}, {"cudnnRNNDescriptor_t", {"hipdnnRNNDescriptor_t", CONV_TYPE, API_DNN}}, + {"cudnnPersistentRNNPlan", {"hipdnnPersistentRNNPlan", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"cudnnPersistentRNNPlan_t", {"hipdnnPersistentRNNPlan_t", CONV_TYPE, API_DNN}}, {"cudnnTensorStruct", {"hipdnnTensorStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"cudnnConvolutionStruct", {"hipdnnConvolutionStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, @@ -3010,23 +3026,30 @@ const std::map CUDA_IDENTIFIER_MAP{ {"cudnnCTCLossDescriptor_t", {"hipdnnCTCLossDescriptor_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"cudnnCTCLossStruct", {"hipdnnCTCLossStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, {"cudnnConvolutionBwdDataAlgoPerf_t", {"hipdnnConvolutionBwdDataAlgoPerf_t", CONV_TYPE, API_DNN}}, + {"cudnnAlgorithmDescriptor_t", {"hipdnnAlgorithmDescriptor_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnAlgorithmStruct", {"hipdnnAlgorithmStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnAlgorithmPerformance_t", {"hipdnnAlgorithmPerformance_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnAlgorithmPerformanceStruct", {"hipdnnAlgorithmPerformanceStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnRNNStruct", {"hipdnnRNNStruct", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnRNNDescriptor_t", {"hipdnnRNNDescriptor_t", CONV_TYPE, API_DNN}}, + {"cudnnAlgorithm_t", {"hipdnnAlgorithm_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnCallback_t", {"hipdnnCallback_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnDebug_t", {"hipdnnDebug_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnLRNMode_t", {"hipdnnLRNMode_t", CONV_TYPE, API_DNN}}, + {"CUDNN_LRN_CROSS_CHANNEL_DIM1", {"HIPDNN_LRN_CROSS_CHANNEL", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 vs 1 {"cudnnRNNInputMode_t", {"hipdnnRNNInputMode_t", CONV_TYPE, API_DNN}}, {"CUDNN_LINEAR_INPUT", {"HIPDNN_LINEAR_INPUT", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_SKIP_INPUT", {"HIPDNN_SKIP_INPUT", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnDirectionMode_t", {"hipdnnDirectionMode_t", CONV_TYPE, API_DNN}}, {"CUDNN_UNIDIRECTIONAL", {"HIPDNN_UNIDIRECTIONAL", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_BIDIRECTIONAL", {"HIPDNN_BIDIRECTIONAL", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnMathType_t", {"hipdnnMathType_t", CONV_TYPE, API_DNN}}, {"CUDNN_DEFAULT_MATH", {"HIPDNN_DEFAULT_MATH", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_TENSOR_OP_MATH", {"HIPDNN_TENSOR_OP_MATH", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnNanPropagation_t", {"hipdnnNanPropagation_t", CONV_TYPE, API_DNN}}, {"CUDNN_NOT_PROPAGATE_NAN", {"HIPDNN_NOT_PROPAGATE_NAN", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_PROPAGATE_NAN", {"HIPDNN_PROPAGATE_NAN", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnConvolutionBwdDataAlgo_t", {"hipdnnConvolutionBwdDataAlgo_t", CONV_TYPE, API_DNN}}, {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_0", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_0", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_1", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_1", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 @@ -3035,7 +3058,6 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_TRANSPOSE_GEMM", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 - {"cudnnConvolutionBwdFilterAlgo_t", {"hipdnnConvolutionBwdFilterAlgo_t", CONV_TYPE, API_DNN}}, {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_0", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_1", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 @@ -3045,24 +3067,20 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 - {"cudnnConvolutionBwdFilterPreference_t", {"hipdnnConvolutionBwdFilterPreference_t", CONV_TYPE, API_DNN}}, {"CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 {"CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT",{"HIPDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT",CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnRNNAlgo_t", {"hipdnnRNNAlgo_t", CONV_TYPE, API_DNN}}, {"CUDNN_RNN_ALGO_STANDARD", {"HIPDNN_RNN_ALGO_STANDARD", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_RNN_ALGO_PERSIST_STATIC", {"HIPDNN_RNN_ALGO_PERSIST_STATIC", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 {"CUDNN_RNN_ALGO_PERSIST_DYNAMIC", {"HIPDNN_RNN_ALGO_PERSIST_DYNAMIC", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 {"CUDNN_RNN_ALGO_COUNT", {"HIPDNN_RNN_ALGO_COUNT", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"cudnnRNNMode_t", {"hipdnnRNNMode_t", CONV_TYPE, API_DNN}}, {"CUDNN_RNN_RELU", {"HIPDNN_RNN_RELU", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_RNN_TANH", {"HIPDNN_RNN_TANH", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 {"CUDNN_LSTM", {"HIPDNN_LSTM", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 {"CUDNN_GRU", {"HIPDNN_GRU", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"cudnnOpTensorOp_t", {"hipdnnOpTensorOp_t", CONV_TYPE, API_DNN}}, {"CUDNN_OP_TENSOR_ADD", {"HIPDNN_OP_TENSOR_ADD", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_OP_TENSOR_MUL", {"HIPDNN_OP_TENSOR_MUL", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 @@ -3070,7 +3088,6 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CUDNN_OP_TENSOR_MAX", {"HIPDNN_OP_TENSOR_MAX", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 {"CUDNN_OP_TENSOR_SQRT", {"HIPDNN_OP_TENSOR_SQRT", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 {"CUDNN_OP_TENSOR_NOT", {"HIPDNN_OP_TENSOR_NOT", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 - {"cudnnReduceTensorOp_t", {"hipdnnReduceTensorOp_t", CONV_TYPE, API_DNN}}, {"CUDNN_REDUCE_TENSOR_ADD", {"HIPDNN_REDUCE_TENSOR_ADD", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_REDUCE_TENSOR_MUL", {"HIPDNN_REDUCE_TENSOR_MUL", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 @@ -3081,36 +3098,43 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CUDNN_REDUCE_TENSOR_NORM1", {"HIPDNN_REDUCE_TENSOR_NORM1", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 {"CUDNN_REDUCE_TENSOR_NORM2", {"HIPDNN_REDUCE_TENSOR_NORM2", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 {"CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS", {"HIPDNN_REDUCE_TENSOR_MUL_NO_ZEROS", CONV_NUMERIC_LITERAL, API_DNN}}, // 8 - {"cudnnReduceTensorIndices_t", {"hipdnnReduceTensorIndices_t", CONV_TYPE, API_DNN}}, {"CUDNN_REDUCE_TENSOR_NO_INDICES", {"HIPDNN_REDUCE_TENSOR_NO_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_REDUCE_TENSOR_FLATTENED_INDICES", {"HIPDNN_REDUCE_TENSOR_FLATTENED_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnConvolutionBwdDataPreference_t", {"hipdnnConvolutionBwdDataPreference_t", CONV_TYPE, API_DNN}}, {"CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 {"CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", {"HIPDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnIndicesType_t", {"hipdnnIndicesType_t", CONV_TYPE, API_DNN}}, {"CUDNN_32BIT_INDICES", {"HIPDNN_32BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_64BIT_INDICES", {"HIPDNN_64BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 {"CUDNN_16BIT_INDICES", {"HIPDNN_16BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 {"CUDNN_8BIT_INDICES", {"HIPDNN_8BIT_INDICES", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"cudnnSoftmaxAlgorithm_t", {"hipdnnSoftmaxAlgorithm_t", CONV_TYPE, API_DNN}}, {"CUDNN_SOFTMAX_FAST", {"HIPDNN_SOFTMAX_FAST", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_SOFTMAX_ACCURATE", {"HIPDNN_SOFTMAX_ACCURATE", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 {"CUDNN_SOFTMAX_LOG", {"HIPDNN_SOFTMAX_LOG", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnSoftmaxMode_t", {"hipdnnSoftmaxMode_t", CONV_TYPE, API_DNN}}, {"CUDNN_SOFTMAX_MODE_INSTANCE", {"HIPDNN_SOFTMAX_MODE_INSTANCE", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_SOFTMAX_MODE_CHANNEL", {"HIPDNN_SOFTMAX_MODE_CHANNEL", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnPoolingMode_t", {"hipdnnPoolingMode_t", CONV_TYPE, API_DNN}}, {"CUDNN_POOLING_MAX", {"HIPDNN_POOLING_MAX", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 {"CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", {"HIPDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 {"CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", {"HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 {"CUDNN_POOLING_MAX_DETERMINISTIC", {"HIPDNN_POOLING_MAX_DETERMINISTIC", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 + {"cudnnActivationMode_t", {"hipdnnActivationMode_t", CONV_TYPE, API_DNN}}, + {"CUDNN_ACTIVATION_SIGMOID", {"HIPDNN_ACTIVATION_SIGMOID", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_ACTIVATION_RELU", {"HIPDNN_ACTIVATION_RELU", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_ACTIVATION_TANH", {"HIPDNN_ACTIVATION_TANH", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_ACTIVATION_CLIPPED_RELU", {"HIPDNN_ACTIVATION_CLIPPED_RELU", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 + {"CUDNN_ACTIVATION_ELU", {"HIPDNN_ACTIVATION_ELU", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 + {"CUDNN_ACTIVATION_IDENTITY", {"HIPDNN_ACTIVATION_PATHTRU", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 + {"cudnnBatchNormMode_t", {"hipdnnBatchNormMode_t", CONV_TYPE, API_DNN}}, + {"CUDNN_BATCHNORM_PER_ACTIVATION", {"HIPDNN_BATCHNORM_PER_ACTIVATION", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_BATCHNORM_SPATIAL", {"HIPDNN_BATCHNORM_SPATIAL", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_BATCHNORM_SPATIAL_PERSISTENT", {"HIPDNN_BATCHNORM_SPATIAL_PERSISTENT", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"cudnnSamplerType_t", {"hipdnnSamplerType_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"CUDNN_SAMPLER_BILINEAR", {"HIPDNN_SAMPLER_BILINEAR", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 {"cudnnGetVersion", {"hipdnnGetVersion", CONV_VERSION, API_DNN}}, {"cudnnGetCudartVersion", {"hipdnnGetCudartVersion", CONV_VERSION, API_DNN, HIP_UNSUPPORTED}}, @@ -3132,9 +3156,11 @@ const std::map CUDA_IDENTIFIER_MAP{ {"cudnnCreateRNNDescriptor", {"hipdnnCreateRNNDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetStream", {"hipdnnSetStream", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetStream", {"hipdnnGetStream", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetRNNDescriptor_v5", {"hipdnnSetRNNDescriptor_v5", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetRNNDescriptor_v6", {"hipdnnSetRNNDescriptor_v6", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetRNNDescriptor", {"hipdnnSetRNNDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnDropoutGetStatesSize", {"hipdnnDropoutGetStatesSize", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDropoutGetReserveSpaceSize", {"hipdnnDropoutGetReserveSpaceSize", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnTransformTensor", {"hipdnnTransformTensor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnSetTensor4dDescriptor", {"hipdnnSetTensor4dDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetTensor4dDescriptor", {"hipdnnGetTensor4dDescriptor", CONV_MATH_FUNC, API_DNN}}, @@ -3158,16 +3184,25 @@ const std::map CUDA_IDENTIFIER_MAP{ {"cudnnGetConvolutionBackwardDataAlgorithm", {"hipdnnGetConvolutionBackwardDataAlgorithm", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetConvolutionBackwardDataAlgorithm_v7", {"hipdnnGetConvolutionBackwardDataAlgorithm_v7", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetConvolutionBackwardDataAlgorithmMaxCount", {"hipdnnGetConvolutionBackwardDataAlgorithmMaxCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetConvolutionForwardAlgorithmMaxCount", {"hipdnnGetConvolutionForwardAlgorithmMaxCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetConvolutionForwardAlgorithmMaxCount", {"hipdnnGetConvolutionForwardAlgorithmMaxCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetRNNLinLayerMatrixParams", {"hipdnnGetRNNLinLayerMatrixParams", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetRNNLinLayerBiasParams", {"hipdnnGetRNNLinLayerBiasParams", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetRNNProjectionLayers", {"hipdnnSetRNNProjectionLayers", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetRNNProjectionLayers", {"hipdnnGetRNNProjectionLayers", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetRNNAlgorithmDescriptor", {"hipdnnSetRNNAlgorithmDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetRNNDescriptor", {"hipdnnGetRNNDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetFilterNdDescriptor", {"hipdnnGetFilterNdDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnFindConvolutionBackwardDataAlgorithm", {"hipdnnFindConvolutionBackwardDataAlgorithm", CONV_MATH_FUNC, API_DNN}}, {"cudnnFindConvolutionBackwardDataAlgorithmEx", {"hipdnnFindConvolutionBackwardDataAlgorithmEx", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetDropoutDescriptor", {"hipdnnSetDropoutDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnRestoreDropoutDescriptor", {"hipdnnRestoreDropoutDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetDropoutDescriptor", {"hipdnnGetDropoutDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetDropoutDescriptor", {"hipdnnGetDropoutDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnSetConvolution2dDescriptor", {"hipdnnSetConvolution2dDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetConvolution2dDescriptor", {"hipdnnGetConvolution2dDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetConvolutionMathType", {"hipdnnSetConvolutionMathType", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDropoutForward", {"hipdnnDropoutForward", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnDropoutBackward", {"hipdnnDropoutBackward", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetConvolutionMathType", {"hipdnnGetConvolutionMathType", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnSetConvolutionGroupCount", {"hipdnnSetConvolutionGroupCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetConvolutionGroupCount", {"hipdnnGetConvolutionGroupCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, @@ -3188,6 +3223,15 @@ const std::map CUDA_IDENTIFIER_MAP{ {"cudnnRNNBackwardData", {"hipdnnRNNBackwardData", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetFilter4dDescriptor", {"hipdnnSetFilter4dDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetRNNMatrixMathType", {"hipdnnSetRNNMatrixMathType", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetRNNMatrixMathType", {"hipdnnGetRNNMatrixMathType", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetRNNForwardInferenceAlgorithmMaxCount", {"hipdnnGetRNNForwardInferenceAlgorithmMaxCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnFindRNNForwardInferenceAlgorithmEx", {"hipdnnFindRNNForwardInferenceAlgorithmEx", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetRNNForwardTrainingAlgorithmMaxCount", {"hipdnnGetRNNForwardTrainingAlgorithmMaxCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnFindRNNForwardTrainingAlgorithmEx", {"hipdnnFindRNNForwardTrainingAlgorithmEx", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetRNNBackwardDataAlgorithmMaxCount", {"hipdnnGetRNNBackwardDataAlgorithmMaxCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnFindRNNBackwardDataAlgorithmEx", {"hipdnnFindRNNBackwardDataAlgorithmEx", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetRNNBackwardWeightsAlgorithmMaxCount", {"hipdnnGetRNNBackwardWeightsAlgorithmMaxCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnFindRNNBackwardWeightsAlgorithmEx", {"hipdnnFindRNNBackwardWeightsAlgorithmEx", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnCreateConvolutionDescriptor", {"hipdnnCreateConvolutionDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetConvolutionForwardAlgorithm", {"hipdnnGetConvolutionForwardAlgorithm", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetConvolutionForwardAlgorithm_v7", {"hipdnnGetConvolutionForwardAlgorithm_v7", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, @@ -3209,10 +3253,8 @@ const std::map CUDA_IDENTIFIER_MAP{ {"cudnnDestroyReduceTensorDescriptor", {"hipdnnDestroyReduceTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnDestroyPersistentRNNPlan", {"hipdnnDestroyPersistentRNNPlan", CONV_MATH_FUNC, API_DNN}}, {"cudnnDestroy", {"hipdnnDestroy", CONV_MATH_FUNC, API_DNN}}, - {"cudnnSoftmaxForward", {"hipdnnSoftmaxForward", CONV_MATH_FUNC, API_DNN}}, {"cudnnSoftmaxBackward", {"hipdnnSoftmaxBackward", CONV_MATH_FUNC, API_DNN}}, - {"cudnnCreatePoolingDescriptor", {"hipdnnCreatePoolingDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnSetPooling2dDescriptor", {"hipdnnSetPooling2dDescriptor", CONV_MATH_FUNC, API_DNN}}, {"cudnnGetPooling2dDescriptor", {"hipdnnGetPooling2dDescriptor", CONV_MATH_FUNC, API_DNN}}, @@ -3221,10 +3263,53 @@ const std::map CUDA_IDENTIFIER_MAP{ {"cudnnGetPoolingNdForwardOutputDim", {"hipdnnGetPoolingNdForwardOutputDim", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, {"cudnnGetPooling2dForwardOutputDim", {"hipdnnGetPooling2dForwardOutputDim", CONV_MATH_FUNC, API_DNN}}, {"cudnnDestroyPoolingDescriptor", {"hipdnnDestroyPoolingDescriptor", CONV_MATH_FUNC, API_DNN}}, - {"cudnnPoolingForward", {"hipdnnPoolingForward", CONV_MATH_FUNC, API_DNN}}, {"cudnnPoolingBackward", {"hipdnnPoolingBackward", CONV_MATH_FUNC, API_DNN}}, - + {"cudnnCreateActivationDescriptor", {"hipdnnCreateActivationDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetActivationDescriptor", {"hipdnnSetActivationDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetActivationDescriptor", {"hipdnnGetActivationDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroyActivationDescriptor", {"hipdnnDestroyActivationDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnActivationForward", {"hipdnnActivationForward", CONV_MATH_FUNC, API_DNN}}, + {"cudnnActivationBackward", {"hipdnnActivationBackward", CONV_MATH_FUNC, API_DNN}}, + {"cudnnCreateLRNDescriptor", {"hipdnnCreateLRNDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetLRNDescriptor", {"hipdnnSetLRNDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetLRNDescriptor", {"hipdnnGetLRNDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroyLRNDescriptor", {"hipdnnDestroyLRNDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnLRNCrossChannelForward", {"hipdnnLRNCrossChannelForward", CONV_MATH_FUNC, API_DNN}}, + {"cudnnLRNCrossChannelBackward", {"hipdnnLRNCrossChannelBackward", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDivisiveNormalizationForward", {"hipdnnDivisiveNormalizationForward", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnDivisiveNormalizationBackward", {"hipdnnDivisiveNormalizationBackward", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnDeriveBNTensorDescriptor", {"hipdnnDeriveBNTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnBatchNormalizationForwardTraining", {"hipdnnBatchNormalizationForwardTraining", CONV_MATH_FUNC, API_DNN}}, + {"cudnnBatchNormalizationForwardInference", {"hipdnnBatchNormalizationForwardInference", CONV_MATH_FUNC, API_DNN}}, + {"cudnnBatchNormalizationBackward", {"hipdnnBatchNormalizationBackward", CONV_MATH_FUNC, API_DNN}}, + {"cudnnCreateSpatialTransformerDescriptor", {"hipdnnCreateSpatialTransformerDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetSpatialTransformerNdDescriptor", {"hipdnnSetSpatialTransformerNdDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnDestroySpatialTransformerDescriptor", {"hipdnnDestroySpatialTransformerDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSpatialTfGridGeneratorForward", {"hipdnnSpatialTfGridGeneratorForward", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSpatialTfGridGeneratorBackward", {"hipdnnSpatialTfGridGeneratorBackward", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSpatialTfSamplerForward", {"hipdnnSpatialTfSamplerForward", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSpatialTfSamplerBackward", {"hipdnnSpatialTfSamplerBackward", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnCreateCTCLossDescriptor", {"hipdnnCreateCTCLossDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetCTCLossDescriptor", {"hipdnnSetCTCLossDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetCTCLossDescriptor", {"hipdnnGetCTCLossDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnDestroyCTCLossDescriptor", {"hipdnnDestroyCTCLossDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnCTCLoss", {"hipdnnCTCLoss", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetCTCLossWorkspaceSize", {"hipdnnGetCTCLossWorkspaceSize", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnCreateAlgorithmDescriptor", {"hipdnnCreateAlgorithmDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetAlgorithmDescriptor", {"hipdnnSetAlgorithmDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetAlgorithmDescriptor", {"hipdnnGetAlgorithmDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnCopyAlgorithmDescriptor", {"hipdnnCopyAlgorithmDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnDestroyAlgorithmDescriptor", {"hipdnnDestroyAlgorithmDescriptor", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnCreateAlgorithmPerformance", {"hipdnnCreateAlgorithmPerformance", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetAlgorithmPerformance", {"hipdnnSetAlgorithmPerformance", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetAlgorithmPerformance", {"hipdnnGetAlgorithmPerformance", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnDestroyAlgorithmPerformance", {"hipdnnDestroyAlgorithmPerformance", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetAlgorithmSpaceSize", {"hipdnnGetAlgorithmSpaceSize", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSaveAlgorithm", {"hipdnnSaveAlgorithm", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnRestoreAlgorithm", {"hipdnnRestoreAlgorithm", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnSetCallback", {"hipdnnSetCallback", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetCallback", {"hipdnnGetCallback", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}}, }; const std::map& CUDA_RENAMES_MAP() { From f47a8236d7a7c77a5476e6e0f7a54812cf686cfe Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Mon, 21 May 2018 22:14:45 +0530 Subject: [PATCH 26/49] Fixed memcpy2D for pinned memory case using 2D kernel --- src/hip_memory.cpp | 143 +++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 70 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index e2202e7860..3abdaec25c 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1208,7 +1208,6 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte return ihipLogStatus(e); } - hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, src, sizeBytes); @@ -1227,7 +1226,6 @@ hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) { return ihipLogStatus(e); } - hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, src, sizeBytes, kind, stream); @@ -1235,7 +1233,6 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, kind, stream)); } - hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, src, sizeBytes, stream); @@ -1258,51 +1255,6 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, h hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToHost, stream)); } -// TODO - review and optimize -hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind) { - if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue; - - hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); - - hc::completion_future marker; - - hipError_t e = hipSuccess; - if((width == dpitch) && (width == spitch)) { - stream->locked_copySync((void*)dst, (void*)src, width*height, kind, false); - } else { - try { - for (int i = 0; i < height; ++i) { - stream->locked_copySync((unsigned char*)dst + i * dpitch, - (unsigned char*)src + i * spitch, width, kind); - } - } catch (ihipException& ex) { - e = ex._code; - } - } - - return e; -} - -hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind) { - HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind); - hipError_t e = hipSuccess; - e = ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind); - return ihipLogStatus(e); -} - -hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { - HIP_INIT_SPECIAL_API((TRACE_MCMD), pCopy); - hipError_t e = hipSuccess; - if (pCopy == nullptr) { - e = hipErrorInvalidValue; - } - e = ihipMemcpy2D(pCopy->dstArray->data, pCopy->widthInBytes, pCopy->srcHost, pCopy->srcPitch, - pCopy->widthInBytes, pCopy->height, hipMemcpyDefault); - return ihipLogStatus(e); -} - hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, wOffset, hOffset, src, spitch, width, height, kind); @@ -1534,16 +1486,15 @@ inline const T& clamp_integer(const T& x, const T& lower, const T& upper) { return std::min(upper, std::max(x, lower)); } -template -__global__ void hip_copy_n(T* dst, const T* src, size_t n) { - const uint32_t grid_dim = gridDim.x * blockDim.x; +template +__global__ void hip_copy2d_n(T* dst, const T* src, size_t width, size_t height, size_t destPitch, size_t srcPitch) { - size_t idx = blockIdx.x * block_dim + threadIdx.x; - while (idx < n) { - // __builtin_memcpy(reinterpret_cast(dst+idx), reinterpret_cast(src+idx), - // sizeof(T)); - dst[idx] = src[idx]; - idx += grid_dim; + size_t idx = blockIdx.x * blockDim.x + threadIdx.x; + size_t idy = blockIdx.y * blockDim.y + threadIdx.y; + if((idx < width) && (idy < height)){ + T *dstPtr = (T *)((uint8_t*) dst + idy * destPitch); + T *srcPtr = (T *)((uint8_t*) src + idy * srcPitch); + dstPtr[idx] = srcPtr[idx]; } } } // namespace @@ -1559,13 +1510,12 @@ void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) { } template -void ihipMemcpyKernel(hipStream_t stream, T* dst, const T* src, size_t sizeBytes) { - static constexpr uint32_t block_dim_ = 256; - - const uint32_t grid_dim = clamp_integer(sizeBytes / block_dim_, 1, UINT32_MAX); - - hipLaunchKernelGGL(hip_copy_n, dim3(grid_dim), dim3{block_dim_}, 0u, stream, dst, src, - sizeBytes); +void ihipMemcpy2dKernel(hipStream_t stream, T* dst, const T* src, size_t width, size_t height, size_t destPitch, size_t srcPitch) { + size_t threadsPerBlock = 16; + uint32_t grid_dim_x = clamp_integer( (width+(threadsPerBlock-1)) / threadsPerBlock, 1, UINT32_MAX); + uint32_t grid_dim_y = clamp_integer( (height+(threadsPerBlock-1)) / threadsPerBlock, 1, UINT32_MAX); + hipLaunchKernelGGL(hip_copy2d_n, dim3(grid_dim_x,grid_dim_y), dim3(threadsPerBlock,threadsPerBlock), 0u, stream, dst, src, + width, height, destPitch, srcPitch); } typedef enum ihipMemsetDataType { @@ -1643,6 +1593,50 @@ int isLockedPointer(const void *ptr) return isLocked; } +// TODO - review and optimize +hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind) { + if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue; + + hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); + int isLocked = 0; + if(kind == hipMemcpyHostToDevice) { + isLocked = isLockedPointer(src); + } else if(kind == hipMemcpyDeviceToHost) { + isLocked = isLockedPointer(dst); + } + + hc::completion_future marker; + + hipError_t e = hipSuccess; + if((width == dpitch) && (width == spitch)) { + stream->locked_copySync((void*)dst, (void*)src, width*height, kind, false); + } else { + try { + if(isLocked) { + for (int i = 0; i < height; ++i) + stream->locked_copySync((unsigned char*)dst + i * dpitch, + (unsigned char*)src + i * spitch, width, kind); + } else { + ihipMemcpy2dKernel (stream, static_cast (dst), static_cast (src), width, height, dpitch, spitch); + stream->locked_wait(); + } + } catch (ihipException& ex) { + e = ex._code; + } + } + + return e; +} + +hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind) { + HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind); + hipError_t e = hipSuccess; + e = ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind); + return ihipLogStatus(e); +} + hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream); @@ -1658,14 +1652,12 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp hip_internal::memcpyAsync(dst, src, width*height, kind, stream); } else { try { - for (int i = 0; i < height; ++i) { - if(!isLocked) { + if(!isLocked){ + for (int i = 0; i < height; ++i) e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch, (unsigned char*)src + i * spitch, width, kind, stream); - } else{ - size_t sizeBytes = width*height; - ihipMemcpyKernel (stream, static_cast (dst), static_cast (src), sizeBytes/sizeof(uint32_t)); - } + } else{ + ihipMemcpy2dKernel (stream, static_cast (dst), static_cast (src), width, height, dpitch, spitch); } } catch (ihipException& ex) { e = ex._code; @@ -1675,6 +1667,17 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp return ihipLogStatus(e); } +hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { + HIP_INIT_SPECIAL_API((TRACE_MCMD), pCopy); + hipError_t e = hipSuccess; + if (pCopy == nullptr) { + e = hipErrorInvalidValue; + } + e = ihipMemcpy2D(pCopy->dstArray->data, pCopy->widthInBytes, pCopy->srcHost, pCopy->srcPitch, + pCopy->widthInBytes, pCopy->height, hipMemcpyDefault); + return ihipLogStatus(e); +} + // TODO-sync: function is async unless target is pinned host memory - then these are fully sync. hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, value, sizeBytes, stream); From 9a76d5b94c86258a12c81fe5e3db6e9908391968 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 23 May 2018 14:43:47 +0530 Subject: [PATCH 27/49] Optimize memcpy2D kernel use --- src/hip_memory.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index fc60dffe86..b76f7f0524 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1495,10 +1495,18 @@ __global__ void hip_copy2d_n(T* dst, const T* src, size_t width, size_t height, size_t idx = blockIdx.x * blockDim.x + threadIdx.x; size_t idy = blockIdx.y * blockDim.y + threadIdx.y; - if((idx < width) && (idy < height)){ + size_t floorWidth = (width/sizeof(T)); + if((idx < floorWidth)){ T *dstPtr = (T *)((uint8_t*) dst + idy * destPitch); T *srcPtr = (T *)((uint8_t*) src + idy * srcPitch); dstPtr[idx] = srcPtr[idx]; + } else { + size_t bytesToCopy = width - floorWidth; + uint8_t *dstPtr = (uint8_t *) ((uint8_t*) dst + idy * destPitch); + uint8_t *srcPtr = (uint8_t *) ((uint8_t*) src + idy * srcPitch); + for(int i =0 ; i < bytesToCopy ; i++) { + dstPtr[idx+i]= srcPtr[idx+i]; + } } } } // namespace @@ -1516,7 +1524,7 @@ void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) { template void ihipMemcpy2dKernel(hipStream_t stream, T* dst, const T* src, size_t width, size_t height, size_t destPitch, size_t srcPitch) { size_t threadsPerBlock = 16; - uint32_t grid_dim_x = clamp_integer( (width+(threadsPerBlock-1)) / threadsPerBlock, 1, UINT32_MAX); + uint32_t grid_dim_x = clamp_integer( ((width/sizeof(T))+(threadsPerBlock-1)) / threadsPerBlock, 1, UINT32_MAX); uint32_t grid_dim_y = clamp_integer( (height+(threadsPerBlock-1)) / threadsPerBlock, 1, UINT32_MAX); hipLaunchKernelGGL(hip_copy2d_n, dim3(grid_dim_x,grid_dim_y), dim3(threadsPerBlock,threadsPerBlock), 0u, stream, dst, src, width, height, destPitch, srcPitch); @@ -1622,7 +1630,7 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch stream->locked_copySync((unsigned char*)dst + i * dpitch, (unsigned char*)src + i * spitch, width, kind); } else { - ihipMemcpy2dKernel (stream, static_cast (dst), static_cast (src), width, height, dpitch, spitch); + ihipMemcpy2dKernel (stream, static_cast (dst), static_cast (src), width, height, dpitch, spitch); stream->locked_wait(); } } catch (ihipException& ex) { @@ -1661,7 +1669,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch, (unsigned char*)src + i * spitch, width, kind, stream); } else{ - ihipMemcpy2dKernel (stream, static_cast (dst), static_cast (src), width, height, dpitch, spitch); + ihipMemcpy2dKernel (stream, static_cast (dst), static_cast (src), width, height, dpitch, spitch); } } catch (ihipException& ex) { e = ex._code; From dc179e0c33a6b5c7061d20c2151b22c001c0b279 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 24 May 2018 08:27:24 +0530 Subject: [PATCH 28/49] Correct remaining bytes in copy 2d kernel --- src/hip_memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index b76f7f0524..a1bc1416a0 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1501,7 +1501,7 @@ __global__ void hip_copy2d_n(T* dst, const T* src, size_t width, size_t height, T *srcPtr = (T *)((uint8_t*) src + idy * srcPitch); dstPtr[idx] = srcPtr[idx]; } else { - size_t bytesToCopy = width - floorWidth; + size_t bytesToCopy = width - (floorWidth * sizeof(T)); uint8_t *dstPtr = (uint8_t *) ((uint8_t*) dst + idy * destPitch); uint8_t *srcPtr = (uint8_t *) ((uint8_t*) src + idy * srcPitch); for(int i =0 ; i < bytesToCopy ; i++) { From 981e56a68f897e7dbd5c8c768be29dd05006eaaf Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 24 May 2018 17:00:12 +0530 Subject: [PATCH 29/49] Fix memcpy2d kernel dims --- src/hip_memory.cpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index a1bc1416a0..2f0c793033 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1496,17 +1496,19 @@ __global__ void hip_copy2d_n(T* dst, const T* src, size_t width, size_t height, size_t idx = blockIdx.x * blockDim.x + threadIdx.x; size_t idy = blockIdx.y * blockDim.y + threadIdx.y; size_t floorWidth = (width/sizeof(T)); - if((idx < floorWidth)){ - T *dstPtr = (T *)((uint8_t*) dst + idy * destPitch); - T *srcPtr = (T *)((uint8_t*) src + idy * srcPitch); - dstPtr[idx] = srcPtr[idx]; - } else { - size_t bytesToCopy = width - (floorWidth * sizeof(T)); - uint8_t *dstPtr = (uint8_t *) ((uint8_t*) dst + idy * destPitch); - uint8_t *srcPtr = (uint8_t *) ((uint8_t*) src + idy * srcPitch); - for(int i =0 ; i < bytesToCopy ; i++) { - dstPtr[idx+i]= srcPtr[idx+i]; - } + if((idx < width) && (idy < height)) { + if((idx < floorWidth)){ + T *dstPtr = (T *)((uint8_t*) dst + idy * destPitch); + T *srcPtr = (T *)((uint8_t*) src + idy * srcPitch); + dstPtr[idx] = srcPtr[idx]; + } else { + size_t bytesToCopy = width - (floorWidth * sizeof(T)); + uint8_t *dstPtr = (uint8_t *) ((uint8_t*) dst + idy * destPitch); + uint8_t *srcPtr = (uint8_t *) ((uint8_t*) src + idy * srcPitch); + for(int i =0 ; i < bytesToCopy ; i++) { + dstPtr[idx+i]= srcPtr[idx+i]; + } + } } } } // namespace @@ -1524,7 +1526,7 @@ void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) { template void ihipMemcpy2dKernel(hipStream_t stream, T* dst, const T* src, size_t width, size_t height, size_t destPitch, size_t srcPitch) { size_t threadsPerBlock = 16; - uint32_t grid_dim_x = clamp_integer( ((width/sizeof(T))+(threadsPerBlock-1)) / threadsPerBlock, 1, UINT32_MAX); + uint32_t grid_dim_x = clamp_integer( (width+(threadsPerBlock*sizeof(T)-1)) / (threadsPerBlock*sizeof(T)), 1, UINT32_MAX); uint32_t grid_dim_y = clamp_integer( (height+(threadsPerBlock-1)) / threadsPerBlock, 1, UINT32_MAX); hipLaunchKernelGGL(hip_copy2d_n, dim3(grid_dim_x,grid_dim_y), dim3(threadsPerBlock,threadsPerBlock), 0u, stream, dst, src, width, height, destPitch, srcPitch); From 4ff059d641c796c9a9e2732745a53c86eb400b3a Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 24 May 2018 23:30:27 +0530 Subject: [PATCH 30/49] Clean up and fix remaining bytes copy --- src/hip_memory.cpp | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 2f0c793033..fa68d7cfb1 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1496,19 +1496,15 @@ __global__ void hip_copy2d_n(T* dst, const T* src, size_t width, size_t height, size_t idx = blockIdx.x * blockDim.x + threadIdx.x; size_t idy = blockIdx.y * blockDim.y + threadIdx.y; size_t floorWidth = (width/sizeof(T)); - if((idx < width) && (idy < height)) { - if((idx < floorWidth)){ - T *dstPtr = (T *)((uint8_t*) dst + idy * destPitch); - T *srcPtr = (T *)((uint8_t*) src + idy * srcPitch); - dstPtr[idx] = srcPtr[idx]; - } else { - size_t bytesToCopy = width - (floorWidth * sizeof(T)); - uint8_t *dstPtr = (uint8_t *) ((uint8_t*) dst + idy * destPitch); - uint8_t *srcPtr = (uint8_t *) ((uint8_t*) src + idy * srcPitch); - for(int i =0 ; i < bytesToCopy ; i++) { - dstPtr[idx+i]= srcPtr[idx+i]; - } - } + T *dstPtr = (T *)((uint8_t*) dst + idy * destPitch); + T *srcPtr = (T *)((uint8_t*) src + idy * srcPitch); + if((idx < floorWidth) && (idy < height)){ + dstPtr[idx] = srcPtr[idx]; + } else if((idx < width) && (idy < height)){ + size_t bytesToCopy = width - (floorWidth * sizeof(T)); + dstPtr += floorWidth; + srcPtr += floorWidth; + __builtin_memcpy(reinterpret_cast(dstPtr), reinterpret_cast(srcPtr),bytesToCopy); } } } // namespace From d8cb47242bed6190b509a1e092721f148f9102e3 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 24 May 2018 23:51:52 +0530 Subject: [PATCH 31/49] Use 64x4 grid dims --- src/hip_memory.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index fa68d7cfb1..9b6758ddf4 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1521,10 +1521,11 @@ void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) { template void ihipMemcpy2dKernel(hipStream_t stream, T* dst, const T* src, size_t width, size_t height, size_t destPitch, size_t srcPitch) { - size_t threadsPerBlock = 16; - uint32_t grid_dim_x = clamp_integer( (width+(threadsPerBlock*sizeof(T)-1)) / (threadsPerBlock*sizeof(T)), 1, UINT32_MAX); - uint32_t grid_dim_y = clamp_integer( (height+(threadsPerBlock-1)) / threadsPerBlock, 1, UINT32_MAX); - hipLaunchKernelGGL(hip_copy2d_n, dim3(grid_dim_x,grid_dim_y), dim3(threadsPerBlock,threadsPerBlock), 0u, stream, dst, src, + size_t threadsPerBlock_x = 64; + size_t threadsPerBlock_y = 4; + uint32_t grid_dim_x = clamp_integer( (width+(threadsPerBlock_x*sizeof(T)-1)) / (threadsPerBlock_x*sizeof(T)), 1, UINT32_MAX); + uint32_t grid_dim_y = clamp_integer( (height+(threadsPerBlock_y-1)) / threadsPerBlock_y, 1, UINT32_MAX); + hipLaunchKernelGGL(hip_copy2d_n, dim3(grid_dim_x,grid_dim_y), dim3(threadsPerBlock_x,threadsPerBlock_y), 0u, stream, dst, src, width, height, destPitch, srcPitch); } From f9693a9210551e9f2dfbda58e09b100d0be79beb Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Fri, 25 May 2018 12:22:16 -0400 Subject: [PATCH 32/49] Add math function declaration for hip-clang --- include/hip/hcc_detail/hip_runtime.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index e1b334aec3..d682d21dbd 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -514,7 +514,9 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri * @} */ - +// +// hip-clang functions +// #elif defined(__clang__) && defined(__HIP__) #define HIP_KERNEL_NAME(...) __VA_ARGS__ @@ -628,6 +630,8 @@ __DEVICE__ void inline __assert_fail(const char * __assertion, } #pragma push_macro("__DEVICE__") +#include + #endif #endif // HIP_HCC_DETAIL_RUNTIME_H From b7801f1fe37ddc620cc6b569ad8e0838b99467f5 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Fri, 25 May 2018 16:18:46 -0400 Subject: [PATCH 33/49] Add more function declarations to hip-clang --- bin/hipcc | 2 +- include/hip/hcc_detail/hip_runtime.h | 30 ++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index b162de2dce..c1446d0f7d 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -90,7 +90,7 @@ if ($HIP_PLATFORM eq "clang") { } $ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm"; $HIPCC="$HIP_CLANG_PATH/clang++"; - $HIPCXXFLAGS .= " -I$HIP_PATH/include"; + $HIPCXXFLAGS .= "-std=c++11 -I$HIP_PATH/include"; $HIPLDFLAGS = "--hip-link --hip-device-lib-path=$DEVICE_LIB_PATH -L$HIP_PATH/lib -lhip_hcc"; } elsif ($HIP_PLATFORM eq "hcc") { diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index d682d21dbd..1e469f5d03 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -29,6 +29,20 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H +#if defined(__HCC__) +#define __HCC_OR_HIP_CLANG__ 1 +#define __HCC_ONLY__ 1 +#define __HIP_CLANG_ONLY__ 0 +#elif defined(__clang__) && defined(__HIP__) +#define __HCC_OR_HIP_CLANG__ 1 +#define __HCC_ONLY__ 0 +#define __HIP_CLANG_ONLY__ 1 +#else +#define __HCC_OR_HIP_CLANG__ 0 +#define __HCC_ONLY__ 0 +#define __HIP_CLANG_ONLY__ 0 +#endif + //--- // Top part of file can be compiled with any compiler @@ -41,15 +55,16 @@ THE SOFTWARE. #include #endif //__cplusplus -#if __HCC__ +#if __HCC_OR_HIP_CLANG__ // Define NVCC_COMPAT for CUDA compatibility #define NVCC_COMPAT #define CUDA_SUCCESS hipSuccess #include +#endif // __HCC_OR_HIP_CLANG__ - +#if __HCC__ // define HIP_ENABLE_PRINTF to enable printf #ifdef HIP_ENABLE_PRINTF #define HCC_ENABLE_ACCELERATOR_PRINTF 1 @@ -164,6 +179,10 @@ extern int HIP_TRACE_API; #define __HCC_C__ #endif +#endif // defined __HCC__ + +#if __HCC_OR_HIP_CLANG__ + // TODO - hipify-clang - change to use the function call. //#define warpSize hc::__wavesize() static constexpr int warpSize = 64; @@ -371,6 +390,10 @@ __device__ void __threadfence_system(void); * @} */ +#endif // __HCC_OR_CLANG__ + +#if defined __HCC__ + template < typename std::common_type::type f> @@ -628,6 +651,9 @@ __DEVICE__ void inline __assert_fail(const char * __assertion, // Ignore all the args for now. __device_trap(); } + +__DEVICE__ void __syncthreads(); + #pragma push_macro("__DEVICE__") #include From 4383d6c6debdb19563d02147517dae6215e35157 Mon Sep 17 00:00:00 2001 From: Jorghi12 Date: Sat, 26 May 2018 00:40:14 -0400 Subject: [PATCH 34/49] Adding double/long int signatures for abs Adding overloads for abs that are found in cuda's math_functions. --- src/math_functions.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/math_functions.cpp b/src/math_functions.cpp index dedc40f2ae..bd0116080d 100644 --- a/src/math_functions.cpp +++ b/src/math_functions.cpp @@ -59,6 +59,12 @@ __device__ int abs(int x) { __device__ long long abs(long long x) { return x >= 0 ? x : -x; } +__device__ double abs(double x) { + return x >= 0 ? x : -x; +} +__device__ long int abs(long int x) { + return x >= 0 ? x : -x; +} __device__ float fabsf(float x) { return hc::precise_math::fabsf(x); } __device__ float fdimf(float x, float y) { return hc::precise_math::fdimf(x, y); } __device__ float fdividef(float x, float y) { return x / y; } From 4f6904b5c7fa7150efbaf2859467b47808f1f3fc Mon Sep 17 00:00:00 2001 From: Jorghi12 Date: Sat, 26 May 2018 00:41:24 -0400 Subject: [PATCH 35/49] Adding double/long int signatures for abs Adding overloads for abs that are found in cuda's math_functions. --- include/hip/hcc_detail/math_functions.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h index e717df07c1..e7312f32f2 100644 --- a/include/hip/hcc_detail/math_functions.h +++ b/include/hip/hcc_detail/math_functions.h @@ -58,6 +58,8 @@ __device__ float expf(float x); __device__ float expm1f(float x); __device__ int abs(int x); __device__ long long abs(long long x); +__device__ double abs(double x); +__device__ long int abs(long int x); __device__ float fabsf(float x); __device__ float fdimf(float x, float y); __device__ float fdividef(float x, float y); From ec2edb2c927e86944a0c5e077fa634357ef3d1d4 Mon Sep 17 00:00:00 2001 From: Jorghi12 Date: Sat, 26 May 2018 16:21:14 -0400 Subject: [PATCH 36/49] Update math_functions.cpp CUDA also has a function named labs. --- src/math_functions.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/math_functions.cpp b/src/math_functions.cpp index bd0116080d..982073275b 100644 --- a/src/math_functions.cpp +++ b/src/math_functions.cpp @@ -65,6 +65,9 @@ __device__ double abs(double x) { __device__ long int abs(long int x) { return x >= 0 ? x : -x; } +__device__ long long int labs(long long int x) { + return x >= 0 ? x : -x; +} __device__ float fabsf(float x) { return hc::precise_math::fabsf(x); } __device__ float fdimf(float x, float y) { return hc::precise_math::fdimf(x, y); } __device__ float fdividef(float x, float y) { return x / y; } From 6e2b9d054794540a83e74d33326a3eda7c039eca Mon Sep 17 00:00:00 2001 From: Jorghi12 Date: Sat, 26 May 2018 16:22:10 -0400 Subject: [PATCH 37/49] Update math_functions.h CUDA also has a function named labs. --- include/hip/hcc_detail/math_functions.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h index e7312f32f2..ea3bd97571 100644 --- a/include/hip/hcc_detail/math_functions.h +++ b/include/hip/hcc_detail/math_functions.h @@ -60,6 +60,7 @@ __device__ int abs(int x); __device__ long long abs(long long x); __device__ double abs(double x); __device__ long int abs(long int x); +__device__ long long int labs(long long int x); __device__ float fabsf(float x); __device__ float fdimf(float x, float y); __device__ float fdividef(float x, float y); From 024f77ce6197cce3e041e7fbb0ee1e916436ff14 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Mon, 28 May 2018 15:02:06 +0530 Subject: [PATCH 38/49] Add 1d texture types for NVCC path --- include/hip/nvcc_detail/hip_runtime_api.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index bd3ffc1cc1..e00405cc71 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -155,6 +155,8 @@ typedef struct cudaArray* hipArray_const_t; typedef cudaTextureObject_t hipTextureObject_t; typedef cudaSurfaceObject_t hipSurfaceObject_t; +#define hipTextureType1D cudaTextureType1D +#define hipTextureType1DLayered cudaTextureType1DLayered #define hipTextureType2D cudaTextureType2D #define hipTextureType3D cudaTextureType3D #define hipDeviceMapHost cudaDeviceMapHost From a05ac35ab120eee2049fa17ea4e278875d36e2f5 Mon Sep 17 00:00:00 2001 From: lthakur Date: Tue, 29 May 2018 14:08:01 +0530 Subject: [PATCH 39/49] HIP test case for 1D texture fetch (#424) --- tests/src/texture/hipBindTexObj1D.cpp | 100 +++++++++++++++++++++ tests/src/texture/tex1Dfetch_linear.cpp | 112 ++++++++++++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 tests/src/texture/hipBindTexObj1D.cpp create mode 100644 tests/src/texture/tex1Dfetch_linear.cpp diff --git a/tests/src/texture/hipBindTexObj1D.cpp b/tests/src/texture/hipBindTexObj1D.cpp new file mode 100644 index 0000000000..2bcf78b1ca --- /dev/null +++ b/tests/src/texture/hipBindTexObj1D.cpp @@ -0,0 +1,100 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +/* HIT_START + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * RUN: %t + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "hip/hip_runtime_api.h" +#include "test_common.h" +#include +#include + +#define N 512 +using namespace std; + +texture tex; + +bool testResult = true; + +__global__ void kernel(int *out) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + out[x] = tex1Dfetch(tex, x); +} + +void runTest(void); + +int main(int argc, char **argv) { + runTest(); + + if (testResult) { + passed(); + } else { + exit(EXIT_FAILURE); + } +} + +void runTest() { + string out; + int *tex_buf; + int val[N], i, output[N]; + size_t size = 0; + + for (i = 0; i < N; i++) { + val[i] = i; + output[i] = 0; + } + hipChannelFormatDesc chan_desc = + hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindUnsigned); + + hipMalloc(&tex_buf, N * sizeof(int)); + + hipMemcpy(tex_buf, val, N * sizeof(int), hipMemcpyHostToDevice); + + tex.addressMode[0] = hipAddressModeWrap; + tex.filterMode = hipFilterModeLinear; + tex.normalized = true; + + hipBindTexture(&size, &tex, (void *)tex_buf, &chan_desc, N * sizeof(int)); + + dim3 dimBlock(64, 1, 1); + dim3 dimGrid(N / dimBlock.x, 1, 1); + + hipLaunchKernelGGL(kernel, dim3(dimGrid), dim3(dimBlock), 0, 0, output); + + hipDeviceSynchronize(); + + hipMemcpy(output, tex_buf, N * sizeof(int), hipMemcpyDeviceToHost); + + for (i = 0; i < N; i++) { + if (output[i] != val[i]) { + testResult = false; + return; + } + } + hipUnbindTexture(&tex); + hipFree(tex_buf); +} diff --git a/tests/src/texture/tex1Dfetch_linear.cpp b/tests/src/texture/tex1Dfetch_linear.cpp new file mode 100644 index 0000000000..4f755873f7 --- /dev/null +++ b/tests/src/texture/tex1Dfetch_linear.cpp @@ -0,0 +1,112 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/*HIT_START + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * RUN: %t + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "hip/hip_runtime_api.h" +#include "test_common.h" +#include +#include + +#define N 512 +using namespace std; + +bool testResult = true; + +__global__ void tex1d_kernel(float *val, hipTextureObject_t obj) { + int k = blockIdx.x * blockDim.x + threadIdx.x; + val[k] = tex1Dfetch(obj, k); +} + +void runTest(void); + +int main(int argc, char **argv) { + runTest(); + + if (testResult) { + passed(); + } else { + exit(EXIT_FAILURE); + } +} + +void runTest() { + + // Allocating the required buffer on gpu device + float *tex_buf, *tex_buf_check; + float val[N], output[N]; + int i; + for (i = 0; i < N; i++) + val[i] = (i + 1) * (i + 1); + hipMalloc(&tex_buf, N * sizeof(float)); + + hipMalloc(&tex_buf_check, N * sizeof(float)); + + hipMemcpy(tex_buf, val, N * sizeof(float), hipMemcpyHostToDevice); + + hipMemset(tex_buf_check, 0, N * sizeof(float)); + hipResourceDesc res_lin; + + memset(&res_lin, 0, sizeof(res_lin)); + + res_lin.resType = hipResourceTypeLinear; + res_lin.res.linear.devPtr = tex_buf; + res_lin.res.linear.desc.f = hipChannelFormatKindFloat; + res_lin.res.linear.desc.x = 32; + res_lin.res.linear.sizeInBytes = N * sizeof(float); + + hipTextureDesc tex_desc; + memset(&tex_desc, 0, sizeof(tex_desc)); + tex_desc.readMode = hipReadModeElementType; + + // Creating texture object + + hipTextureObject_t tex_obj = 0; + + hipCreateTextureObject(&tex_obj, &res_lin, &tex_desc, NULL); + + dim3 dimBlock(64, 1, 1); + dim3 dimGrid(N / dimBlock.x, 1, 1); + + for (i = 0; i < N; i++) + output[i] = 0; + + hipLaunchKernelGGL(tex1d_kernel, dim3(dimGrid), dim3(dimBlock), 0, 0, + tex_buf_check, tex_obj); + hipDeviceSynchronize(); + + hipMemcpy(output, tex_buf_check, N * sizeof(float), hipMemcpyDeviceToHost); + + for (i = 0; i < N; i++) + if (output[i] != val[i]) { + testResult = false; + } + + hipDestroyTextureObject(tex_obj); + hipFree(tex_buf); + hipFree(tex_buf_check); +} From ca156449e21379ffe26da667d6f3b71b33295c79 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Tue, 29 May 2018 16:03:37 +0530 Subject: [PATCH 40/49] Fix tex2D tests for result value --- tests/src/texture/hipTextureObj2D.cpp | 216 +++++++++++++------------- tests/src/texture/hipTextureRef2D.cpp | 186 +++++++++++----------- 2 files changed, 201 insertions(+), 201 deletions(-) diff --git a/tests/src/texture/hipTextureObj2D.cpp b/tests/src/texture/hipTextureObj2D.cpp index 1bf51bc2cb..504b632612 100644 --- a/tests/src/texture/hipTextureObj2D.cpp +++ b/tests/src/texture/hipTextureObj2D.cpp @@ -1,108 +1,108 @@ -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * RUN: %t - * HIT_END - */ -#include -#include -#include - -#include -#include "test_common.h" - -bool testResult = true; - -__global__ void tex2DKernel(float* outputData, hipTextureObject_t textureObject, int width, - int height) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - outputData[y * width + x] = tex2D(textureObject, x, y); -} - -void runTest(int argc, char** argv); - -int main(int argc, char** argv) { - runTest(argc, argv); - - if (testResult) { - passed(); - } else { - exit(EXIT_FAILURE); - } -} - -void runTest(int argc, char** argv) { - unsigned int width = 256; - unsigned int height = 256; - unsigned int size = width * height * sizeof(float); - float* hData = (float*)malloc(size); - memset(hData, 0, size); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = i * width + j; - } - } - printf("hData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hData[i]); - } - printf("\n"); - - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); - hipArray* hipArray; - hipMallocArray(&hipArray, &channelDesc, width, height); - - hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice); - - struct hipResourceDesc resDesc; - memset(&resDesc, 0, sizeof(resDesc)); - resDesc.resType = hipResourceTypeArray; - resDesc.res.array.array = hipArray; - - // Specify texture object parameters - struct hipTextureDesc texDesc; - memset(&texDesc, 0, sizeof(texDesc)); - texDesc.addressMode[0] = hipAddressModeWrap; - texDesc.addressMode[1] = hipAddressModeWrap; - texDesc.filterMode = hipFilterModePoint; - texDesc.readMode = hipReadModeElementType; - texDesc.normalizedCoords = 0; - - // Create texture object - hipTextureObject_t textureObject = 0; - hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL); - - float* dData = NULL; - hipMalloc((void**)&dData, size); - - dim3 dimBlock(16, 16, 1); - dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); - - hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject, - width, height); - - hipDeviceSynchronize(); - - float* hOutputData = (float*)malloc(size); - memset(hOutputData, 0, size); - hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); - - printf("dData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hOutputData[i]); - } - printf("\n"); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (hData[i * width + j] != hOutputData[i * width + j]) { - printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j], - hOutputData[i * width + j]); - testResult = false; - break; - } - } - } - hipDestroyTextureObject(textureObject); - hipFree(dData); - hipFreeArray(hipArray); -} +/* HIT_START + * BUILD: %t %s ../test_common.cpp + * RUN: %t + * HIT_END + */ +#include +#include +#include + +#include +#include "test_common.h" + +__global__ void tex2DKernel(float* outputData, hipTextureObject_t textureObject, int width, + int height) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + outputData[y * width + x] = tex2D(textureObject, x, y); +} + +int runTest(int argc, char** argv); + +int main(int argc, char** argv) { + int testResult = runTest(argc, argv); + + if (testResult) { + passed(); + } else { + exit(EXIT_FAILURE); + } +} + +void runTest(int argc, char** argv) { + int testResult = 1; + unsigned int width = 256; + unsigned int height = 256; + unsigned int size = width * height * sizeof(float); + float* hData = (float*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + hData[i * width + j] = i * width + j; + } + } + printf("hData: "); + for (int i = 0; i < 64; i++) { + printf("%f ", hData[i]); + } + printf("\n"); + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); + hipArray* hipArray; + hipMallocArray(&hipArray, &channelDesc, width, height); + + hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice); + + struct hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Specify texture object parameters + struct hipTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.addressMode[0] = hipAddressModeWrap; + texDesc.addressMode[1] = hipAddressModeWrap; + texDesc.filterMode = hipFilterModePoint; + texDesc.readMode = hipReadModeElementType; + texDesc.normalizedCoords = 0; + + // Create texture object + hipTextureObject_t textureObject = 0; + hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL); + + float* dData = NULL; + hipMalloc((void**)&dData, size); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); + + hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject, + width, height); + + hipDeviceSynchronize(); + + float* hOutputData = (float*)malloc(size); + memset(hOutputData, 0, size); + hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); + + printf("dData: "); + for (int i = 0; i < 64; i++) { + printf("%f ", hOutputData[i]); + } + printf("\n"); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + if (hData[i * width + j] != hOutputData[i * width + j]) { + printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j], + hOutputData[i * width + j]); + testResult = 0; + break; + } + } + } + hipDestroyTextureObject(textureObject); + hipFree(dData); + hipFreeArray(hipArray); + return testResult; +} diff --git a/tests/src/texture/hipTextureRef2D.cpp b/tests/src/texture/hipTextureRef2D.cpp index c4c0b9e2fe..b912f789a7 100644 --- a/tests/src/texture/hipTextureRef2D.cpp +++ b/tests/src/texture/hipTextureRef2D.cpp @@ -1,93 +1,93 @@ -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * RUN: %t - * HIT_END - */ -#include -#include -#include - -#include -#include "test_common.h" -texture tex; - -bool testResult = true; - -__global__ void tex2DKernel(float* outputData, - int width, int height) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - outputData[y * width + x] = tex2D(tex, x, y); -} - -void runTest(int argc, char** argv); - -int main(int argc, char** argv) { - runTest(argc, argv); - if (testResult) { - passed(); - } else { - exit(EXIT_FAILURE); - } -} - -void runTest(int argc, char** argv) { - unsigned int width = 256; - unsigned int height = 256; - unsigned int size = width * height * sizeof(float); - float* hData = (float*)malloc(size); - memset(hData, 0, size); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = i * width + j; - } - } - printf("hData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hData[i]); - } - printf("\n"); - - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); - hipArray* hipArray; - hipMallocArray(&hipArray, &channelDesc, width, height); - - hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice); - - tex.addressMode[0] = hipAddressModeWrap; - tex.addressMode[1] = hipAddressModeWrap; - tex.filterMode = hipFilterModePoint; - tex.normalized = 0; - - hipBindTextureToArray(tex, hipArray, channelDesc); - - float* dData = NULL; - hipMalloc((void**)&dData, size); - - dim3 dimBlock(16, 16, 1); - dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); - hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, width, height); - hipDeviceSynchronize(); - - float* hOutputData = (float*)malloc(size); - memset(hOutputData, 0, size); - hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); - - printf("dData: "); - for (int i = 0; i < 64; i++) { - printf("%f ", hOutputData[i]); - } - printf("\n"); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (hData[i * width + j] != hOutputData[i * width + j]) { - printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j], - hOutputData[i * width + j]); - testResult = false; - break; - } - } - } - hipFree(dData); - hipFreeArray(hipArray); -} +/* HIT_START + * BUILD: %t %s ../test_common.cpp + * RUN: %t + * HIT_END + */ +#include +#include +#include + +#include +#include "test_common.h" +texture tex; + +__global__ void tex2DKernel(float* outputData, + int width, int height) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + outputData[y * width + x] = tex2D(tex, x, y); +} + +int runTest(int argc, char** argv); + +int main(int argc, char** argv) { + int testResult = runTest(argc, argv); + if (testResult) { + passed(); + } else { + exit(EXIT_FAILURE); + } +} + +int runTest(int argc, char** argv) { + int testResult = 1; + unsigned int width = 256; + unsigned int height = 256; + unsigned int size = width * height * sizeof(float); + float* hData = (float*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + hData[i * width + j] = i * width + j; + } + } + printf("hData: "); + for (int i = 0; i < 64; i++) { + printf("%f ", hData[i]); + } + printf("\n"); + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); + hipArray* hipArray; + hipMallocArray(&hipArray, &channelDesc, width, height); + + hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice); + + tex.addressMode[0] = hipAddressModeWrap; + tex.addressMode[1] = hipAddressModeWrap; + tex.filterMode = hipFilterModePoint; + tex.normalized = 0; + + hipBindTextureToArray(tex, hipArray, channelDesc); + + float* dData = NULL; + hipMalloc((void**)&dData, size); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1); + hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, width, height); + hipDeviceSynchronize(); + + float* hOutputData = (float*)malloc(size); + memset(hOutputData, 0, size); + hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); + + printf("dData: "); + for (int i = 0; i < 64; i++) { + printf("%f ", hOutputData[i]); + } + printf("\n"); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + if (hData[i * width + j] != hOutputData[i * width + j]) { + printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j], + hOutputData[i * width + j]); + testResult = 0; + break; + } + } + } + hipFree(dData); + hipFreeArray(hipArray); + return testResult; +} From 7f7a7041a5b70b430023bab31ca42b0eeb5da5c7 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Tue, 29 May 2018 16:18:14 +0530 Subject: [PATCH 41/49] Fix runTest return type --- tests/src/texture/hipTextureObj2D.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/src/texture/hipTextureObj2D.cpp b/tests/src/texture/hipTextureObj2D.cpp index 504b632612..e214295989 100644 --- a/tests/src/texture/hipTextureObj2D.cpp +++ b/tests/src/texture/hipTextureObj2D.cpp @@ -29,7 +29,7 @@ int main(int argc, char** argv) { } } -void runTest(int argc, char** argv) { +int runTest(int argc, char** argv) { int testResult = 1; unsigned int width = 256; unsigned int height = 256; From d67b614b22a3bb2d70ba67e76a48d918519b49bb Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 30 May 2018 10:59:07 +0530 Subject: [PATCH 42/49] Fix hipBindTexture on NVCC path --- include/hip/nvcc_detail/hip_runtime_api.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index e00405cc71..d54a8de99c 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -1163,8 +1163,8 @@ inline static hipError_t hipBindTexture(size_t* offset, const struct texture -inline static hipError_t hipBindTexture(size_t* offset, struct texture* tex, - const void* devPtr, const struct hipChannelFormatDesc* desc, +inline static hipError_t hipBindTexture(size_t* offset, struct texture& tex, + const void* devPtr, const struct hipChannelFormatDesc& desc, size_t size = UINT_MAX) { return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); } @@ -1174,6 +1174,11 @@ inline static hipError_t hipUnbindTexture(struct texture* tex) return hipCUDAErrorTohipError(cudaUnbindTexture(tex)); } +inline static hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr, + const hipChannelFormatDesc* desc, size_t size = UINT_MAX){ + return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); +} + template inline static hipError_t hipBindTextureToArray(struct texture& tex, hipArray_const_t array, From 7fd92d471bf786d1b68c7032b06cf623f54f99c8 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Wed, 30 May 2018 11:24:48 +0530 Subject: [PATCH 43/49] Add signature for hipFuncGetAttributes to nvcc_details Change-Id: I06d5b50028fcfe3ea9d81f749b02be6b094c0977 --- include/hip/nvcc_detail/hip_runtime_api.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index d54a8de99c..fbff263295 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -150,6 +150,7 @@ typedef CUfunction hipFunction_t; typedef CUdeviceptr hipDeviceptr_t; typedef struct cudaArray hipArray; typedef struct cudaArray* hipArray_const_t; +typedef cudaFuncAttributes hipFuncAttributes; #define hipMemcpy3DParms cudaMemcpy3DParms #define hipArrayDefault cudaArrayDefault @@ -1107,6 +1108,10 @@ inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule return hipCUResultTohipError(cuModuleGetFunction(function, module, kname)); } +inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { + return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func)); +} + inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, const char* name) { return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name)); From 8b93cd67a6208f45cac5293435877efd0bef49c4 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Wed, 30 May 2018 11:26:11 +0530 Subject: [PATCH 44/49] [dtests] Fix nvcc path build errors in hipFuncGetAttributes.tst Change-Id: I000bed69162897f7b6edd733ed6e9acc93beb0ed --- tests/src/runtimeApi/module/hipFuncGetAttributes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp b/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp index 7d3eff5f73..6af87edb25 100644 --- a/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp +++ b/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 * RUN: %t * HIT_END */ From e8f3381997a556629d0c55b8f69e28b623fc732f Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 30 May 2018 11:42:01 +0530 Subject: [PATCH 45/49] Fixed tex1dFetch test --- tests/src/texture/hipBindTexObj1D.cpp | 100 --------------------- tests/src/texture/hipBindTexRef1DFetch.cpp | 95 ++++++++++++++++++++ 2 files changed, 95 insertions(+), 100 deletions(-) delete mode 100644 tests/src/texture/hipBindTexObj1D.cpp create mode 100644 tests/src/texture/hipBindTexRef1DFetch.cpp diff --git a/tests/src/texture/hipBindTexObj1D.cpp b/tests/src/texture/hipBindTexObj1D.cpp deleted file mode 100644 index 2bcf78b1ca..0000000000 --- a/tests/src/texture/hipBindTexObj1D.cpp +++ /dev/null @@ -1,100 +0,0 @@ -/* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -/* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc - * RUN: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include "test_common.h" -#include -#include - -#define N 512 -using namespace std; - -texture tex; - -bool testResult = true; - -__global__ void kernel(int *out) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - out[x] = tex1Dfetch(tex, x); -} - -void runTest(void); - -int main(int argc, char **argv) { - runTest(); - - if (testResult) { - passed(); - } else { - exit(EXIT_FAILURE); - } -} - -void runTest() { - string out; - int *tex_buf; - int val[N], i, output[N]; - size_t size = 0; - - for (i = 0; i < N; i++) { - val[i] = i; - output[i] = 0; - } - hipChannelFormatDesc chan_desc = - hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindUnsigned); - - hipMalloc(&tex_buf, N * sizeof(int)); - - hipMemcpy(tex_buf, val, N * sizeof(int), hipMemcpyHostToDevice); - - tex.addressMode[0] = hipAddressModeWrap; - tex.filterMode = hipFilterModeLinear; - tex.normalized = true; - - hipBindTexture(&size, &tex, (void *)tex_buf, &chan_desc, N * sizeof(int)); - - dim3 dimBlock(64, 1, 1); - dim3 dimGrid(N / dimBlock.x, 1, 1); - - hipLaunchKernelGGL(kernel, dim3(dimGrid), dim3(dimBlock), 0, 0, output); - - hipDeviceSynchronize(); - - hipMemcpy(output, tex_buf, N * sizeof(int), hipMemcpyDeviceToHost); - - for (i = 0; i < N; i++) { - if (output[i] != val[i]) { - testResult = false; - return; - } - } - hipUnbindTexture(&tex); - hipFree(tex_buf); -} diff --git a/tests/src/texture/hipBindTexRef1DFetch.cpp b/tests/src/texture/hipBindTexRef1DFetch.cpp new file mode 100644 index 0000000000..5d9a1b103b --- /dev/null +++ b/tests/src/texture/hipBindTexRef1DFetch.cpp @@ -0,0 +1,95 @@ +/* +Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +/* HIT_START + * BUILD: %t %s ../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "test_common.h" + +#define N 512 + +texture tex; + +__global__ void kernel(float *out) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + if(x Date: Mon, 28 May 2018 10:46:23 -0400 Subject: [PATCH 46/49] Drop --amdgpu-target= options for hip-clang They are replaced by --cuda-gpu-arch= options elsewhere --- bin/hipcc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bin/hipcc b/bin/hipcc index c1446d0f7d..7fafc30053 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -303,6 +303,10 @@ foreach $arg (@ARGV) $default_amdgpu_target = 0; } + # hip-clang does not accept --amdgpu-target= options. + if (($arg =~ /--amdgpu-target=/) and $HIP_PLATFORM eq 'clang' ) { + $swallowArg = 1; + } if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0)) { From 6b8218e5a0ae78d29720ed6f92ba7cfcdec58fe2 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Wed, 30 May 2018 06:38:41 -0400 Subject: [PATCH 47/49] Add more function declarations for hip-clang --- include/hip/hcc_detail/hip_runtime.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 1e469f5d03..9737ecb19f 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -390,7 +390,7 @@ __device__ void __threadfence_system(void); * @} */ -#endif // __HCC_OR_CLANG__ +#endif // __HCC_OR_HIP_CLANG__ #if defined __HCC__ @@ -437,6 +437,8 @@ static constexpr Coordinates threadIdx; #define hipGridDim_y (hc_get_num_groups(1)) #define hipGridDim_z (hc_get_num_groups(2)) +#endif // defined __HCC__ +#if __HCC_OR_HIP_CLANG__ extern "C" __device__ void* __hip_hc_memcpy(void* dst, const void* src, size_t size); extern "C" __device__ void* __hip_hc_memset(void* ptr, uint8_t val, size_t size); extern "C" __device__ void* __hip_hc_malloc(size_t); @@ -472,7 +474,9 @@ static inline __device__ void printf(const char* format, All... all) {} #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) +#endif //__HCC_OR_HIP_CLANG__ +#ifdef __HCC__ #define HIP_KERNEL_NAME(...) (__VA_ARGS__) #define HIP_SYMBOL(X) #X @@ -652,8 +656,6 @@ __DEVICE__ void inline __assert_fail(const char * __assertion, __device_trap(); } -__DEVICE__ void __syncthreads(); - #pragma push_macro("__DEVICE__") #include From 5ae6d93526f600d8b6cfb627312136e10eabb57d Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 30 May 2018 21:54:51 +0530 Subject: [PATCH 48/49] Fixed texture obj 1Dfetch test --- tests/src/texture/hipTextureObj1DFetch.cpp | 99 ++++++++++++++++++ tests/src/texture/tex1Dfetch_linear.cpp | 112 --------------------- 2 files changed, 99 insertions(+), 112 deletions(-) create mode 100644 tests/src/texture/hipTextureObj1DFetch.cpp delete mode 100644 tests/src/texture/tex1Dfetch_linear.cpp diff --git a/tests/src/texture/hipTextureObj1DFetch.cpp b/tests/src/texture/hipTextureObj1DFetch.cpp new file mode 100644 index 0000000000..faf6541ce1 --- /dev/null +++ b/tests/src/texture/hipTextureObj1DFetch.cpp @@ -0,0 +1,99 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/*HIT_START + * BUILD: %t %s ../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "test_common.h" + +#define N 512 + +__global__ void tex1dKernel(float *val, hipTextureObject_t obj) { + int k = blockIdx.x * blockDim.x + threadIdx.x; + if (k < N) + val[k] = tex1Dfetch(obj, k); +} + +int runTest(void); + +int main(int argc, char **argv) { + int testResult = runTest(); + if(testResult) { + passed(); + } else { + exit(EXIT_FAILURE); + } +} + +int runTest() { + int testResult = 1; + // Allocating the required buffer on gpu device + float *texBuf, *texBufOut; + float val[N], output[N]; + for (int i = 0; i < N; i++) { + val[i] = (i + 1) * (i + 1); + output[i] = 0.0; + } + HIPCHECK(hipMalloc(&texBuf, N * sizeof(float))); + HIPCHECK(hipMalloc(&texBufOut, N * sizeof(float))); + HIPCHECK(hipMemcpy(texBuf, val, N * sizeof(float), hipMemcpyHostToDevice)); + HIPCHECK(hipMemset(texBufOut, 0, N * sizeof(float))); + hipResourceDesc resDescLinear; + + memset(&resDescLinear, 0, sizeof(resDescLinear)); + resDescLinear.resType = hipResourceTypeLinear; + resDescLinear.res.linear.devPtr = texBuf; + resDescLinear.res.linear.desc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); + resDescLinear.res.linear.sizeInBytes = N * sizeof(float); + + hipTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.readMode = hipReadModeElementType; + + // Creating texture object + hipTextureObject_t texObj = 0; + HIPCHECK(hipCreateTextureObject(&texObj, &resDescLinear, &texDesc, NULL)); + + dim3 dimBlock(64, 1, 1); + dim3 dimGrid(N / dimBlock.x, 1, 1); + + hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, + texBufOut, texObj); + HIPCHECK(hipDeviceSynchronize()); + + HIPCHECK(hipMemcpy(output, texBufOut, N * sizeof(float), hipMemcpyDeviceToHost)); + + for(int i = 0; i < N; i++) + if (output[i] != val[i]) { + testResult = 0; + break; + } + + HIPCHECK(hipDestroyTextureObject(texObj)); + HIPCHECK(hipFree(texBuf)); + HIPCHECK(hipFree(texBufOut)); + return testResult; +} diff --git a/tests/src/texture/tex1Dfetch_linear.cpp b/tests/src/texture/tex1Dfetch_linear.cpp deleted file mode 100644 index 4f755873f7..0000000000 --- a/tests/src/texture/tex1Dfetch_linear.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/*HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc - * RUN: %t - * HIT_END - */ - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include "test_common.h" -#include -#include - -#define N 512 -using namespace std; - -bool testResult = true; - -__global__ void tex1d_kernel(float *val, hipTextureObject_t obj) { - int k = blockIdx.x * blockDim.x + threadIdx.x; - val[k] = tex1Dfetch(obj, k); -} - -void runTest(void); - -int main(int argc, char **argv) { - runTest(); - - if (testResult) { - passed(); - } else { - exit(EXIT_FAILURE); - } -} - -void runTest() { - - // Allocating the required buffer on gpu device - float *tex_buf, *tex_buf_check; - float val[N], output[N]; - int i; - for (i = 0; i < N; i++) - val[i] = (i + 1) * (i + 1); - hipMalloc(&tex_buf, N * sizeof(float)); - - hipMalloc(&tex_buf_check, N * sizeof(float)); - - hipMemcpy(tex_buf, val, N * sizeof(float), hipMemcpyHostToDevice); - - hipMemset(tex_buf_check, 0, N * sizeof(float)); - hipResourceDesc res_lin; - - memset(&res_lin, 0, sizeof(res_lin)); - - res_lin.resType = hipResourceTypeLinear; - res_lin.res.linear.devPtr = tex_buf; - res_lin.res.linear.desc.f = hipChannelFormatKindFloat; - res_lin.res.linear.desc.x = 32; - res_lin.res.linear.sizeInBytes = N * sizeof(float); - - hipTextureDesc tex_desc; - memset(&tex_desc, 0, sizeof(tex_desc)); - tex_desc.readMode = hipReadModeElementType; - - // Creating texture object - - hipTextureObject_t tex_obj = 0; - - hipCreateTextureObject(&tex_obj, &res_lin, &tex_desc, NULL); - - dim3 dimBlock(64, 1, 1); - dim3 dimGrid(N / dimBlock.x, 1, 1); - - for (i = 0; i < N; i++) - output[i] = 0; - - hipLaunchKernelGGL(tex1d_kernel, dim3(dimGrid), dim3(dimBlock), 0, 0, - tex_buf_check, tex_obj); - hipDeviceSynchronize(); - - hipMemcpy(output, tex_buf_check, N * sizeof(float), hipMemcpyDeviceToHost); - - for (i = 0; i < N; i++) - if (output[i] != val[i]) { - testResult = false; - } - - hipDestroyTextureObject(tex_obj); - hipFree(tex_buf); - hipFree(tex_buf_check); -} From b7641cf8353fb4b510c66bf7f1edae3f946a9a9f Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Wed, 30 May 2018 16:33:18 -0400 Subject: [PATCH 49/49] Fix __syncthreads for hip-clang --- include/hip/hcc_detail/hip_runtime.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 9737ecb19f..1a6b0f7dda 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -471,12 +471,12 @@ static inline __device__ void printf(const char* format, All... all) {} #endif #endif - - -#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) #endif //__HCC_OR_HIP_CLANG__ #ifdef __HCC__ + +#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) + #define HIP_KERNEL_NAME(...) (__VA_ARGS__) #define HIP_SYMBOL(X) #X @@ -656,6 +656,8 @@ __DEVICE__ void inline __assert_fail(const char * __assertion, __device_trap(); } +extern "C" __device__ __attribute__((noduplicate)) void __syncthreads(); + #pragma push_macro("__DEVICE__") #include