From 097fa4f2008bd0cfdaf7ed08fdb73e8ca83efbba Mon Sep 17 00:00:00 2001 From: emankov Date: Wed, 2 May 2018 22:11:05 +0300 Subject: [PATCH] [HIPIFY] Initial cuDNN support - based on https://github.com/ROCmSoftwarePlatform/hipDNN. - lit testing was supplemented with CUDA_DNN_ROOT_DIR for cudnn testing. - single cuDNN test was added. [ROCm/hip commit: 2569972dde51db2c383604661625243dcaba1a26] --- projects/hip/hipify-clang/src/CUDA2HipMap.cpp | 80 ++++++ projects/hip/hipify-clang/src/Statistics.h | 1 + .../cuDNN/cudnn_convolution_forward.cu | 254 ++++++++++++++++++ projects/hip/tests/hipify-clang/lit.cfg | 4 +- .../hip/tests/hipify-clang/lit.site.cfg.in | 1 + 5 files changed, 339 insertions(+), 1 deletion(-) create mode 100644 projects/hip/tests/hipify-clang/cuDNN/cudnn_convolution_forward.cu diff --git a/projects/hip/hipify-clang/src/CUDA2HipMap.cpp b/projects/hip/hipify-clang/src/CUDA2HipMap.cpp index 47358802e9..cb96f7b225 100644 --- a/projects/hip/hipify-clang/src/CUDA2HipMap.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HipMap.cpp @@ -403,6 +403,9 @@ const std::map CUDA_INCLUDE_MAP{ {"curand_precalc.h", {"hiprand_kernel.h", CONV_INCLUDE, API_RAND}}, {"curand_uniform.h", {"hiprand_kernel.h", CONV_INCLUDE, API_RAND}}, + // CUDNN includes + {"cudnn.h", {"hipDNN.h", CONV_INCLUDE_CUDA_MAIN_H, API_RAND}}, + // HIP includes // TODO: uncomment this when hip/cudacommon.h will be renamed to hip/hipcommon.h // {"cudacommon.h", {"hipcommon.h", CONV_INCLUDE, API_RUNTIME}}, @@ -2880,6 +2883,83 @@ const std::map CUDA_IDENTIFIER_MAP{ {"curand_poisson4", {"hiprand_poisson4", CONV_DEVICE_FUNC, API_RAND}}, {"curand_Philox4x32_10", {"hiprand_Philox4x32_10", CONV_DEVICE_FUNC, API_RAND, HIP_UNSUPPORTED}}, // unchanged function names: skipahead, skipahead_sequence, skipahead_subsequence + + ///////////////////////////// cuDNN ///////////////////////////// + {"cudnnContext", {"hipdnnContext", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnHandle_t", {"hipdnnHandle_t", CONV_TYPE, API_DNN}}, + {"cudnnStatus_t", {"hipdnnStatus_t", CONV_TYPE, API_DNN}}, + {"CUDNN_STATUS_SUCCESS", {"HIPDNN_STATUS_SUCCESS", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_STATUS_NOT_INITIALIZED", {"HIPDNN_STATUS_NOT_INITIALIZED", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_STATUS_ALLOC_FAILED", {"HIPDNN_STATUS_ALLOC_FAILED", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_STATUS_BAD_PARAM", {"HIPDNN_STATUS_BAD_PARAM", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 + {"CUDNN_STATUS_INTERNAL_ERROR", {"HIPDNN_STATUS_INTERNAL_ERROR", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 + {"CUDNN_STATUS_INVALID_VALUE", {"HIPDNN_STATUS_INVALID_VALUE", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 + {"CUDNN_STATUS_ARCH_MISMATCH", {"HIPDNN_STATUS_ARCH_MISMATCH", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 + {"CUDNN_STATUS_MAPPING_ERROR", {"HIPDNN_STATUS_MAPPING_ERROR", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 + {"CUDNN_STATUS_EXECUTION_FAILED", {"HIPDNN_STATUS_EXECUTION_FAILED", CONV_NUMERIC_LITERAL, API_DNN}}, // 8 + {"CUDNN_STATUS_NOT_SUPPORTED", {"HIPDNN_STATUS_NOT_SUPPORTED", CONV_NUMERIC_LITERAL, API_DNN}}, // 9 + {"CUDNN_STATUS_LICENSE_ERROR", {"HIPDNN_STATUS_LICENSE_ERROR", CONV_NUMERIC_LITERAL, API_DNN}}, // 10 + {"CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", {"HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", CONV_NUMERIC_LITERAL, API_DNN}}, // 11 + {"CUDNN_STATUS_RUNTIME_IN_PROGRESS", {"HIPDNN_STATUS_RUNTIME_IN_PROGRESS", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 12 + {"CUDNN_STATUS_RUNTIME_FP_OVERFLOW", {"HIPDNN_STATUS_RUNTIME_FP_OVERFLOW", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 13 + {"cudnnRuntimeTag_t", {"hipdnnRuntimeTag_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnTensorDescriptor_t", {"hipdnnTensorDescriptor_t", CONV_TYPE, API_DNN}}, + {"cudnnConvolutionDescriptor_t", {"hipdnnConvolutionDescriptor_t", CONV_TYPE, API_DNN}}, + {"cudnnConvolutionMode_t", {"hipdnnConvolutionMode_t", CONV_TYPE, API_DNN}}, + {"CUDNN_CONVOLUTION", {"HIPDNN_CONVOLUTION", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_CROSS_CORRELATION", {"HIPDNN_CROSS_CORRELATION", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"cudnnTensorFormat_t", {"hipdnnTensorFormat_t", CONV_TYPE, API_DNN}}, + {"CUDNN_TENSOR_NCHW", {"HIPDNN_TENSOR_NCHW", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_TENSOR_NHWC", {"HIPDNN_TENSOR_NHWC", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_TENSOR_NCHW_VECT_C", {"HIPDNN_TENSOR_NCHW_VECT_C", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"cudnnDataType_t", {"hipdnnDataType_t", CONV_TYPE, API_DNN}}, + {"CUDNN_DATA_FLOAT", {"HIPDNN_DATA_FLOAT", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_DATA_DOUBLE", {"HIPDNN_DATA_DOUBLE", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_DATA_HALF", {"HIPDNN_DATA_HALF", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_DATA_INT8", {"HIPDNN_DATA_INT8", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 + {"CUDNN_DATA_INT32", {"HIPDNN_DATA_INT32", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 + {"CUDNN_DATA_INT8x4", {"HIPDNN_DATA_INT8x4", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 + {"CUDNN_DATA_UINT8", {"HIPDNN_DATA_UINT8", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 + {"CUDNN_DATA_UINT8x4", {"HIPDNN_DATA_UINT8x4", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 + + {"cudnnConvolutionFwdAlgo_t", {"hipdnnConvolutionFwdAlgo_t", CONV_TYPE, API_DNN}}, + {"CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_CONVOLUTION_FWD_ALGO_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_GEMM", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + {"CUDNN_CONVOLUTION_FWD_ALGO_DIRECT", {"HIPDNN_CONVOLUTION_FWD_ALGO_DIRECT", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 + {"CUDNN_CONVOLUTION_FWD_ALGO_FFT", {"HIPDNN_CONVOLUTION_FWD_ALGO_FFT", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 + {"CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", {"HIPDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 + {"CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 + {"CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 + {"CUDNN_CONVOLUTION_FWD_ALGO_COUNT", {"HIPDNN_CONVOLUTION_FWD_ALGO_COUNT", CONV_NUMERIC_LITERAL, API_DNN}}, // 8 + + {"cudnnConvolutionFwdPreference_t", {"hipdnnConvolutionFwdPreference_t", CONV_TYPE, API_DNN}}, + {"CUDNN_CONVOLUTION_FWD_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_FWD_NO_WORKSPACE", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 + {"CUDNN_CONVOLUTION_FWD_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_FWD_PREFER_FASTEST", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 + {"CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", {"HIPDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 + + {"cudnnFilterDescriptor_t", {"hipdnnFilterDescriptor_t", CONV_TYPE, API_DNN}}, + + {"cudnnGetVersion", {"hipdnnGetVersion", CONV_VERSION, API_DNN}}, + {"cudnnGetCudartVersion", {"hipdnnGetCudartVersion", CONV_VERSION, API_DNN, HIP_UNSUPPORTED}}, + {"cudnnGetErrorString", {"hipdnnGetErrorString", CONV_ERROR, API_DNN}}, + {"cudnnCreate", {"hipdnnCreate", CONV_MATH_FUNC, API_DNN}}, + {"cudnnCreateTensorDescriptor", {"hipdnnCreateTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetTensor4dDescriptor", {"hipdnnSetTensor4dDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetConvolution2dDescriptor", {"hipdnnSetConvolution2dDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolution2dForwardOutputDim", {"hipdnnGetConvolution2dForwardOutputDim", CONV_MATH_FUNC, API_DNN}}, + {"cudnnCreateFilterDescriptor", {"hipdnnCreateFilterDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnSetFilter4dDescriptor", {"hipdnnSetFilter4dDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnCreateConvolutionDescriptor", {"hipdnnCreateConvolutionDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolutionForwardAlgorithm", {"hipdnnGetConvolutionForwardAlgorithm", CONV_MATH_FUNC, API_DNN}}, + {"cudnnConvolutionForward", {"hipdnnConvolutionForward", CONV_MATH_FUNC, API_DNN}}, + {"cudnnGetConvolutionForwardWorkspaceSize", {"hipdnnGetConvolutionForwardWorkspaceSize", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroyTensorDescriptor", {"hipdnnDestroyTensorDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroyConvolutionDescriptor", {"hipdnnDestroyConvolutionDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroyFilterDescriptor", {"hipdnnDestroyFilterDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroyFilterDescriptor", {"hipdnnDestroyFilterDescriptor", CONV_MATH_FUNC, API_DNN}}, + {"cudnnDestroy", {"hipdnnDestroy", CONV_MATH_FUNC, API_DNN}}, + }; const std::map& CUDA_RENAMES_MAP() { diff --git a/projects/hip/hipify-clang/src/Statistics.h b/projects/hip/hipify-clang/src/Statistics.h index 81be7b09a8..77bf9b0518 100644 --- a/projects/hip/hipify-clang/src/Statistics.h +++ b/projects/hip/hipify-clang/src/Statistics.h @@ -56,6 +56,7 @@ enum ApiTypes { API_RUNTIME, API_BLAS, API_RAND, + API_DNN, API_LAST }; constexpr int NUM_API_TYPES = (int) ApiTypes::API_LAST; diff --git a/projects/hip/tests/hipify-clang/cuDNN/cudnn_convolution_forward.cu b/projects/hip/tests/hipify-clang/cuDNN/cudnn_convolution_forward.cu new file mode 100644 index 0000000000..e58116e22a --- /dev/null +++ b/projects/hip/tests/hipify-clang/cuDNN/cudnn_convolution_forward.cu @@ -0,0 +1,254 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args + +#include +#include +#include +#include + +// CHECK: #include +#include +// CHECK: #include "hipDNN.h" +#include "cudnn.h" + +// CHECK: hipError_t err = (f); \ +// CHECK: if (err != hipSuccess) { \ + +#define CUDA_CALL(f) { \ + cudaError_t err = (f); \ + if (err != cudaSuccess) { \ + std::cout \ + << " Error occurred: " << err << std::endl; \ + std::exit(1); \ + } \ +} +// CHECK: hipdnnStatus_t err = (f); \ +// CHECK: if (err != HIPDNN_STATUS_SUCCESS) { \ + +#define CUDNN_CALL(f) { \ + cudnnStatus_t err = (f); \ + if (err != CUDNN_STATUS_SUCCESS) { \ + std::cout \ + << " Error occurred: " << err << std::endl; \ + std::exit(1); \ + } \ +} + +__global__ void dev_const(float *px, float k) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + px[tid] = k; +} + +__global__ void dev_iota(float *px) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + px[tid] = tid; +} + +void print(const float *data, int n, int c, int h, int w) { + std::vector buffer(1 << 20); + // CHECK: CUDA_CALL(hipMemcpy( + CUDA_CALL(cudaMemcpy( + buffer.data(), data, + n * c * h * w * sizeof(float), + // CHECK: hipMemcpyDeviceToHost)); + cudaMemcpyDeviceToHost)); + int a = 0; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < c; ++j) { + std::cout << "n=" << i << ", c=" << j << ":" << std::endl; + for (int k = 0; k < h; ++k) { + for (int l = 0; l < w; ++l) { + std::cout << std::setw(4) << std::right << buffer[a]; + ++a; + } + std::cout << std::endl; + } + } + } + std::cout << std::endl; +} + +int main() { + // CHECK: hipdnnHandle_t cudnn; + cudnnHandle_t cudnn; + // CHECK: CUDNN_CALL(hipdnnCreate(&cudnn)); + CUDNN_CALL(cudnnCreate(&cudnn)); + + // input + const int in_n = 1; + const int in_c = 1; + const int in_h = 5; + const int in_w = 5; + std::cout << "in_n: " << in_n << std::endl; + std::cout << "in_c: " << in_c << std::endl; + std::cout << "in_h: " << in_h << std::endl; + std::cout << "in_w: " << in_w << std::endl; + std::cout << std::endl; + // CHECK: hipdnnTensorDescriptor_t in_desc; + cudnnTensorDescriptor_t in_desc; + // CHECK: CUDNN_CALL(hipdnnCreateTensorDescriptor(&in_desc)); + CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc)); + // CHECK: CUDNN_CALL(hipdnnSetTensor4dDescriptor( + CUDNN_CALL(cudnnSetTensor4dDescriptor( + // CHECK: in_desc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, + in_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, + in_n, in_c, in_h, in_w)); + + float *in_data; + // CHECK: CUDA_CALL(hipMalloc( + CUDA_CALL(cudaMalloc( + &in_data, in_n * in_c * in_h * in_w * sizeof(float))); + + // filter + const int filt_k = 1; + const int filt_c = 1; + const int filt_h = 2; + const int filt_w = 2; + std::cout << "filt_k: " << filt_k << std::endl; + std::cout << "filt_c: " << filt_c << std::endl; + std::cout << "filt_h: " << filt_h << std::endl; + std::cout << "filt_w: " << filt_w << std::endl; + std::cout << std::endl; + + // CHECK: hipdnnFilterDescriptor_t filt_desc; + cudnnFilterDescriptor_t filt_desc; + // CHECK: CUDNN_CALL(hipdnnCreateFilterDescriptor(&filt_desc)); + CUDNN_CALL(cudnnCreateFilterDescriptor(&filt_desc)); + // CHECK: CUDNN_CALL(hipdnnSetFilter4dDescriptor( + CUDNN_CALL(cudnnSetFilter4dDescriptor( + // CHECK: filt_desc, HIPDNN_DATA_FLOAT, HIPDNN_TENSOR_NCHW, + filt_desc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, + filt_k, filt_c, filt_h, filt_w)); + + float *filt_data; + // CUDA_CALL(hipMalloc( + CUDA_CALL(cudaMalloc( + &filt_data, filt_k * filt_c * filt_h * filt_w * sizeof(float))); + + // convolution + const int pad_h = 1; + const int pad_w = 1; + const int str_h = 1; + const int str_w = 1; + const int dil_h = 1; + const int dil_w = 1; + std::cout << "pad_h: " << pad_h << std::endl; + std::cout << "pad_w: " << pad_w << std::endl; + std::cout << "str_h: " << str_h << std::endl; + std::cout << "str_w: " << str_w << std::endl; + std::cout << "dil_h: " << dil_h << std::endl; + std::cout << "dil_w: " << dil_w << std::endl; + std::cout << std::endl; + + // CHECK: hipdnnConvolutionDescriptor_t conv_desc; + cudnnConvolutionDescriptor_t conv_desc; + // CUDNN_CALL(hipdnnCreateConvolutionDescriptor(&conv_desc)); + CUDNN_CALL(cudnnCreateConvolutionDescriptor(&conv_desc)); + // CHECK: CUDNN_CALL(hipdnnSetConvolution2dDescriptor( + CUDNN_CALL(cudnnSetConvolution2dDescriptor( + conv_desc, + pad_h, pad_w, str_h, str_w, dil_h, dil_w, + // CHECK: HIPDNN_CONVOLUTION, HIPDNN_DATA_FLOAT)); + CUDNN_CONVOLUTION, CUDNN_DATA_FLOAT)); + + // output + int out_n; + int out_c; + int out_h; + int out_w; + + // CHECK: CUDNN_CALL(hipdnnGetConvolution2dForwardOutputDim( + CUDNN_CALL(cudnnGetConvolution2dForwardOutputDim( + conv_desc, in_desc, filt_desc, + &out_n, &out_c, &out_h, &out_w)); + + std::cout << "out_n: " << out_n << std::endl; + std::cout << "out_c: " << out_c << std::endl; + std::cout << "out_h: " << out_h << std::endl; + std::cout << "out_w: " << out_w << std::endl; + std::cout << std::endl; + // CHECK: hipdnnTensorDescriptor_t out_desc; + cudnnTensorDescriptor_t out_desc; + // CHECK: CUDNN_CALL(hipdnnCreateTensorDescriptor(&out_desc)); + CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc)); + // CHECK: CUDNN_CALL(hipdnnSetTensor4dDescriptor( + CUDNN_CALL(cudnnSetTensor4dDescriptor( + // CHECK: out_desc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, + out_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, + out_n, out_c, out_h, out_w)); + + float *out_data; + // CHECK: CUDA_CALL(hipMalloc( + CUDA_CALL(cudaMalloc( + &out_data, out_n * out_c * out_h * out_w * sizeof(float))); + + // algorithm + // CHECK: hipdnnConvolutionFwdAlgo_t algo; + cudnnConvolutionFwdAlgo_t algo; + // CHECK: CUDNN_CALL(hipdnnGetConvolutionForwardAlgorithm( + CUDNN_CALL(cudnnGetConvolutionForwardAlgorithm( + cudnn, + in_desc, filt_desc, conv_desc, out_desc, + // CHECK: HIPDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo)); + CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo)); + + std::cout << "Convolution algorithm: " << algo << std::endl; + std::cout << std::endl; + + // workspace + size_t ws_size; + // CHECK: CUDNN_CALL(hipdnnGetConvolutionForwardWorkspaceSize( + CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize( + cudnn, in_desc, filt_desc, conv_desc, out_desc, algo, &ws_size)); + + float *ws_data; + // CHECK: CUDA_CALL(hipMalloc(&ws_data, ws_size)); + CUDA_CALL(cudaMalloc(&ws_data, ws_size)); + + std::cout << "Workspace size: " << ws_size << std::endl; + std::cout << std::endl; + + // perform + float alpha = 1.f; + float beta = 0.f; + // CHECK: hipLaunchKernelGGL(dev_iota, dim3(in_w * in_h), dim3(in_n * in_c), 0, 0, in_data); + // CHECK: hipLaunchKernelGGL(dev_const, dim3(filt_w * filt_h), dim3(filt_k * filt_c), 0, 0, filt_data, 1.f); + dev_iota<<>>(in_data); + dev_const<<>>(filt_data, 1.f); + // CHECK: CUDNN_CALL(hipdnnConvolutionForward( + CUDNN_CALL(cudnnConvolutionForward( + cudnn, + &alpha, in_desc, in_data, filt_desc, filt_data, + conv_desc, algo, ws_data, ws_size, + &beta, out_desc, out_data)); + + // results + std::cout << "in_data:" << std::endl; + print(in_data, in_n, in_c, in_h, in_w); + + std::cout << "filt_data:" << std::endl; + print(filt_data, filt_k, filt_c, filt_h, filt_w); + + std::cout << "out_data:" << std::endl; + print(out_data, out_n, out_c, out_h, out_w); + + // finalizing + // CHECK: CUDA_CALL(hipFree(ws_data)); + CUDA_CALL(cudaFree(ws_data)); + // CHECK: CUDA_CALL(hipFree(out_data)); + CUDA_CALL(cudaFree(out_data)); + // CHECK: CUDNN_CALL(hipdnnDestroyTensorDescriptor(out_desc)); + CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc)); + // CHECK: CUDNN_CALL(hipdnnDestroyConvolutionDescriptor(conv_desc)); + CUDNN_CALL(cudnnDestroyConvolutionDescriptor(conv_desc)); + // CHECK: CUDA_CALL(hipFree(filt_data)); + CUDA_CALL(cudaFree(filt_data)); + // CHECK: CUDNN_CALL(hipdnnDestroyFilterDescriptor(filt_desc)); + CUDNN_CALL(cudnnDestroyFilterDescriptor(filt_desc)); + // CHECK: CUDA_CALL(hipFree(in_data)); + CUDA_CALL(cudaFree(in_data)); + // CHECK: CUDNN_CALL(hipdnnDestroyTensorDescriptor(in_desc)); + CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc)); + // CHECK: CUDNN_CALL(hipdnnDestroy(cudnn)); + CUDNN_CALL(cudnnDestroy(cudnn)); + return 0; +} diff --git a/projects/hip/tests/hipify-clang/lit.cfg b/projects/hip/tests/hipify-clang/lit.cfg index 104cee8311..98961fe166 100644 --- a/projects/hip/tests/hipify-clang/lit.cfg +++ b/projects/hip/tests/hipify-clang/lit.cfg @@ -57,6 +57,8 @@ else: run_test_ext = ".sh" clang_args += " -isystem'%s'/samples/common/inc" -config.substitutions.append(("%cuda_args", clang_args % (config.cuda_root, config.cuda_sdk_root))) +clang_args += " -I'%s'/include" + +config.substitutions.append(("%cuda_args", clang_args % (config.cuda_root, config.cuda_sdk_root, config.cuda_dnn_root))) config.substitutions.append(("hipify", '"' + hipify_path + "/hipify-clang" + '"')) config.substitutions.append(("%run_test", '"' + config.test_source_root + "/run_test" + run_test_ext + '"')) diff --git a/projects/hip/tests/hipify-clang/lit.site.cfg.in b/projects/hip/tests/hipify-clang/lit.site.cfg.in index e52db7b026..99aa92b49f 100644 --- a/projects/hip/tests/hipify-clang/lit.site.cfg.in +++ b/projects/hip/tests/hipify-clang/lit.site.cfg.in @@ -4,6 +4,7 @@ import os config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@" config.obj_root = "@CMAKE_CURRENT_BINARY_DIR@" config.cuda_root = "@CUDA_TOOLKIT_ROOT_DIR@" +config.cuda_dnn_root = "@CUDA_DNN_ROOT_DIR@" if sys.platform in ['win32']: config.cuda_sdk_root = "@CUDA_SDK_ROOT_DIR@" if not config.cuda_sdk_root or config.cuda_sdk_root == "CUDA_SDK_ROOT_DIR-NOTFOUND":