[HIPIFY] Initial cuDNN support

- based on https://github.com/ROCmSoftwarePlatform/hipDNN.
- lit testing was supplemented with CUDA_DNN_ROOT_DIR for cudnn testing.
- single cuDNN test was added.


[ROCm/hip commit: 2569972dde]
Этот коммит содержится в:
emankov
2018-05-02 22:11:05 +03:00
родитель bda8b52dcc
Коммит 097fa4f200
5 изменённых файлов: 339 добавлений и 1 удалений
+80
Просмотреть файл
@@ -403,6 +403,9 @@ const std::map <llvm::StringRef, hipCounter> CUDA_INCLUDE_MAP{
{"curand_precalc.h", {"hiprand_kernel.h", CONV_INCLUDE, API_RAND}},
{"curand_uniform.h", {"hiprand_kernel.h", CONV_INCLUDE, API_RAND}},
// CUDNN includes
{"cudnn.h", {"hipDNN.h", CONV_INCLUDE_CUDA_MAIN_H, API_RAND}},
// HIP includes
// TODO: uncomment this when hip/cudacommon.h will be renamed to hip/hipcommon.h
// {"cudacommon.h", {"hipcommon.h", CONV_INCLUDE, API_RUNTIME}},
@@ -2880,6 +2883,83 @@ const std::map<llvm::StringRef, hipCounter> CUDA_IDENTIFIER_MAP{
{"curand_poisson4", {"hiprand_poisson4", CONV_DEVICE_FUNC, API_RAND}},
{"curand_Philox4x32_10", {"hiprand_Philox4x32_10", CONV_DEVICE_FUNC, API_RAND, HIP_UNSUPPORTED}},
// unchanged function names: skipahead, skipahead_sequence, skipahead_subsequence
///////////////////////////// cuDNN /////////////////////////////
{"cudnnContext", {"hipdnnContext", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
{"cudnnHandle_t", {"hipdnnHandle_t", CONV_TYPE, API_DNN}},
{"cudnnStatus_t", {"hipdnnStatus_t", CONV_TYPE, API_DNN}},
{"CUDNN_STATUS_SUCCESS", {"HIPDNN_STATUS_SUCCESS", CONV_NUMERIC_LITERAL, API_DNN}}, // 0
{"CUDNN_STATUS_NOT_INITIALIZED", {"HIPDNN_STATUS_NOT_INITIALIZED", CONV_NUMERIC_LITERAL, API_DNN}}, // 1
{"CUDNN_STATUS_ALLOC_FAILED", {"HIPDNN_STATUS_ALLOC_FAILED", CONV_NUMERIC_LITERAL, API_DNN}}, // 2
{"CUDNN_STATUS_BAD_PARAM", {"HIPDNN_STATUS_BAD_PARAM", CONV_NUMERIC_LITERAL, API_DNN}}, // 3
{"CUDNN_STATUS_INTERNAL_ERROR", {"HIPDNN_STATUS_INTERNAL_ERROR", CONV_NUMERIC_LITERAL, API_DNN}}, // 4
{"CUDNN_STATUS_INVALID_VALUE", {"HIPDNN_STATUS_INVALID_VALUE", CONV_NUMERIC_LITERAL, API_DNN}}, // 5
{"CUDNN_STATUS_ARCH_MISMATCH", {"HIPDNN_STATUS_ARCH_MISMATCH", CONV_NUMERIC_LITERAL, API_DNN}}, // 6
{"CUDNN_STATUS_MAPPING_ERROR", {"HIPDNN_STATUS_MAPPING_ERROR", CONV_NUMERIC_LITERAL, API_DNN}}, // 7
{"CUDNN_STATUS_EXECUTION_FAILED", {"HIPDNN_STATUS_EXECUTION_FAILED", CONV_NUMERIC_LITERAL, API_DNN}}, // 8
{"CUDNN_STATUS_NOT_SUPPORTED", {"HIPDNN_STATUS_NOT_SUPPORTED", CONV_NUMERIC_LITERAL, API_DNN}}, // 9
{"CUDNN_STATUS_LICENSE_ERROR", {"HIPDNN_STATUS_LICENSE_ERROR", CONV_NUMERIC_LITERAL, API_DNN}}, // 10
{"CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", {"HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", CONV_NUMERIC_LITERAL, API_DNN}}, // 11
{"CUDNN_STATUS_RUNTIME_IN_PROGRESS", {"HIPDNN_STATUS_RUNTIME_IN_PROGRESS", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 12
{"CUDNN_STATUS_RUNTIME_FP_OVERFLOW", {"HIPDNN_STATUS_RUNTIME_FP_OVERFLOW", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 13
{"cudnnRuntimeTag_t", {"hipdnnRuntimeTag_t", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
{"cudnnTensorDescriptor_t", {"hipdnnTensorDescriptor_t", CONV_TYPE, API_DNN}},
{"cudnnConvolutionDescriptor_t", {"hipdnnConvolutionDescriptor_t", CONV_TYPE, API_DNN}},
{"cudnnConvolutionMode_t", {"hipdnnConvolutionMode_t", CONV_TYPE, API_DNN}},
{"CUDNN_CONVOLUTION", {"HIPDNN_CONVOLUTION", CONV_NUMERIC_LITERAL, API_DNN}}, // 0
{"CUDNN_CROSS_CORRELATION", {"HIPDNN_CROSS_CORRELATION", CONV_NUMERIC_LITERAL, API_DNN}}, // 1
{"cudnnTensorFormat_t", {"hipdnnTensorFormat_t", CONV_TYPE, API_DNN}},
{"CUDNN_TENSOR_NCHW", {"HIPDNN_TENSOR_NCHW", CONV_NUMERIC_LITERAL, API_DNN}}, // 0
{"CUDNN_TENSOR_NHWC", {"HIPDNN_TENSOR_NHWC", CONV_NUMERIC_LITERAL, API_DNN}}, // 1
{"CUDNN_TENSOR_NCHW_VECT_C", {"HIPDNN_TENSOR_NCHW_VECT_C", CONV_NUMERIC_LITERAL, API_DNN}}, // 2
{"cudnnDataType_t", {"hipdnnDataType_t", CONV_TYPE, API_DNN}},
{"CUDNN_DATA_FLOAT", {"HIPDNN_DATA_FLOAT", CONV_NUMERIC_LITERAL, API_DNN}}, // 0
{"CUDNN_DATA_DOUBLE", {"HIPDNN_DATA_DOUBLE", CONV_NUMERIC_LITERAL, API_DNN}}, // 1
{"CUDNN_DATA_HALF", {"HIPDNN_DATA_HALF", CONV_NUMERIC_LITERAL, API_DNN}}, // 2
{"CUDNN_DATA_INT8", {"HIPDNN_DATA_INT8", CONV_NUMERIC_LITERAL, API_DNN}}, // 3
{"CUDNN_DATA_INT32", {"HIPDNN_DATA_INT32", CONV_NUMERIC_LITERAL, API_DNN}}, // 4
{"CUDNN_DATA_INT8x4", {"HIPDNN_DATA_INT8x4", CONV_NUMERIC_LITERAL, API_DNN}}, // 5
{"CUDNN_DATA_UINT8", {"HIPDNN_DATA_UINT8", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6
{"CUDNN_DATA_UINT8x4", {"HIPDNN_DATA_UINT8x4", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7
{"cudnnConvolutionFwdAlgo_t", {"hipdnnConvolutionFwdAlgo_t", CONV_TYPE, API_DNN}},
{"CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", CONV_NUMERIC_LITERAL, API_DNN}}, // 0
{"CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", CONV_NUMERIC_LITERAL, API_DNN}}, // 1
{"CUDNN_CONVOLUTION_FWD_ALGO_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_GEMM", CONV_NUMERIC_LITERAL, API_DNN}}, // 2
{"CUDNN_CONVOLUTION_FWD_ALGO_DIRECT", {"HIPDNN_CONVOLUTION_FWD_ALGO_DIRECT", CONV_NUMERIC_LITERAL, API_DNN}}, // 3
{"CUDNN_CONVOLUTION_FWD_ALGO_FFT", {"HIPDNN_CONVOLUTION_FWD_ALGO_FFT", CONV_NUMERIC_LITERAL, API_DNN}}, // 4
{"CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", {"HIPDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", CONV_NUMERIC_LITERAL, API_DNN}}, // 5
{"CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", CONV_NUMERIC_LITERAL, API_DNN}}, // 6
{"CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", CONV_NUMERIC_LITERAL, API_DNN}}, // 7
{"CUDNN_CONVOLUTION_FWD_ALGO_COUNT", {"HIPDNN_CONVOLUTION_FWD_ALGO_COUNT", CONV_NUMERIC_LITERAL, API_DNN}}, // 8
{"cudnnConvolutionFwdPreference_t", {"hipdnnConvolutionFwdPreference_t", CONV_TYPE, API_DNN}},
{"CUDNN_CONVOLUTION_FWD_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_FWD_NO_WORKSPACE", CONV_NUMERIC_LITERAL, API_DNN}}, // 0
{"CUDNN_CONVOLUTION_FWD_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_FWD_PREFER_FASTEST", CONV_NUMERIC_LITERAL, API_DNN}}, // 1
{"CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", {"HIPDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", CONV_NUMERIC_LITERAL, API_DNN}}, // 2
{"cudnnFilterDescriptor_t", {"hipdnnFilterDescriptor_t", CONV_TYPE, API_DNN}},
{"cudnnGetVersion", {"hipdnnGetVersion", CONV_VERSION, API_DNN}},
{"cudnnGetCudartVersion", {"hipdnnGetCudartVersion", CONV_VERSION, API_DNN, HIP_UNSUPPORTED}},
{"cudnnGetErrorString", {"hipdnnGetErrorString", CONV_ERROR, API_DNN}},
{"cudnnCreate", {"hipdnnCreate", CONV_MATH_FUNC, API_DNN}},
{"cudnnCreateTensorDescriptor", {"hipdnnCreateTensorDescriptor", CONV_MATH_FUNC, API_DNN}},
{"cudnnSetTensor4dDescriptor", {"hipdnnSetTensor4dDescriptor", CONV_MATH_FUNC, API_DNN}},
{"cudnnSetConvolution2dDescriptor", {"hipdnnSetConvolution2dDescriptor", CONV_MATH_FUNC, API_DNN}},
{"cudnnGetConvolution2dForwardOutputDim", {"hipdnnGetConvolution2dForwardOutputDim", CONV_MATH_FUNC, API_DNN}},
{"cudnnCreateFilterDescriptor", {"hipdnnCreateFilterDescriptor", CONV_MATH_FUNC, API_DNN}},
{"cudnnSetFilter4dDescriptor", {"hipdnnSetFilter4dDescriptor", CONV_MATH_FUNC, API_DNN}},
{"cudnnCreateConvolutionDescriptor", {"hipdnnCreateConvolutionDescriptor", CONV_MATH_FUNC, API_DNN}},
{"cudnnGetConvolutionForwardAlgorithm", {"hipdnnGetConvolutionForwardAlgorithm", CONV_MATH_FUNC, API_DNN}},
{"cudnnConvolutionForward", {"hipdnnConvolutionForward", CONV_MATH_FUNC, API_DNN}},
{"cudnnGetConvolutionForwardWorkspaceSize", {"hipdnnGetConvolutionForwardWorkspaceSize", CONV_MATH_FUNC, API_DNN}},
{"cudnnDestroyTensorDescriptor", {"hipdnnDestroyTensorDescriptor", CONV_MATH_FUNC, API_DNN}},
{"cudnnDestroyConvolutionDescriptor", {"hipdnnDestroyConvolutionDescriptor", CONV_MATH_FUNC, API_DNN}},
{"cudnnDestroyFilterDescriptor", {"hipdnnDestroyFilterDescriptor", CONV_MATH_FUNC, API_DNN}},
{"cudnnDestroyFilterDescriptor", {"hipdnnDestroyFilterDescriptor", CONV_MATH_FUNC, API_DNN}},
{"cudnnDestroy", {"hipdnnDestroy", CONV_MATH_FUNC, API_DNN}},
};
const std::map<llvm::StringRef, hipCounter>& CUDA_RENAMES_MAP() {
+1
Просмотреть файл
@@ -56,6 +56,7 @@ enum ApiTypes {
API_RUNTIME,
API_BLAS,
API_RAND,
API_DNN,
API_LAST
};
constexpr int NUM_API_TYPES = (int) ApiTypes::API_LAST;
+254
Просмотреть файл
@@ -0,0 +1,254 @@
// RUN: %run_test hipify "%s" "%t" %cuda_args
#include <iomanip>
#include <iostream>
#include <cstdlib>
#include <vector>
// CHECK: #include <hip/hip_runtime.h>
#include <cuda.h>
// CHECK: #include "hipDNN.h"
#include "cudnn.h"
// CHECK: hipError_t err = (f); \
// CHECK: if (err != hipSuccess) { \
#define CUDA_CALL(f) { \
cudaError_t err = (f); \
if (err != cudaSuccess) { \
std::cout \
<< " Error occurred: " << err << std::endl; \
std::exit(1); \
} \
}
// CHECK: hipdnnStatus_t err = (f); \
// CHECK: if (err != HIPDNN_STATUS_SUCCESS) { \
#define CUDNN_CALL(f) { \
cudnnStatus_t err = (f); \
if (err != CUDNN_STATUS_SUCCESS) { \
std::cout \
<< " Error occurred: " << err << std::endl; \
std::exit(1); \
} \
}
__global__ void dev_const(float *px, float k) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
px[tid] = k;
}
__global__ void dev_iota(float *px) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
px[tid] = tid;
}
void print(const float *data, int n, int c, int h, int w) {
std::vector<float> buffer(1 << 20);
// CHECK: CUDA_CALL(hipMemcpy(
CUDA_CALL(cudaMemcpy(
buffer.data(), data,
n * c * h * w * sizeof(float),
// CHECK: hipMemcpyDeviceToHost));
cudaMemcpyDeviceToHost));
int a = 0;
for (int i = 0; i < n; ++i) {
for (int j = 0; j < c; ++j) {
std::cout << "n=" << i << ", c=" << j << ":" << std::endl;
for (int k = 0; k < h; ++k) {
for (int l = 0; l < w; ++l) {
std::cout << std::setw(4) << std::right << buffer[a];
++a;
}
std::cout << std::endl;
}
}
}
std::cout << std::endl;
}
int main() {
// CHECK: hipdnnHandle_t cudnn;
cudnnHandle_t cudnn;
// CHECK: CUDNN_CALL(hipdnnCreate(&cudnn));
CUDNN_CALL(cudnnCreate(&cudnn));
// input
const int in_n = 1;
const int in_c = 1;
const int in_h = 5;
const int in_w = 5;
std::cout << "in_n: " << in_n << std::endl;
std::cout << "in_c: " << in_c << std::endl;
std::cout << "in_h: " << in_h << std::endl;
std::cout << "in_w: " << in_w << std::endl;
std::cout << std::endl;
// CHECK: hipdnnTensorDescriptor_t in_desc;
cudnnTensorDescriptor_t in_desc;
// CHECK: CUDNN_CALL(hipdnnCreateTensorDescriptor(&in_desc));
CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc));
// CHECK: CUDNN_CALL(hipdnnSetTensor4dDescriptor(
CUDNN_CALL(cudnnSetTensor4dDescriptor(
// CHECK: in_desc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT,
in_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT,
in_n, in_c, in_h, in_w));
float *in_data;
// CHECK: CUDA_CALL(hipMalloc(
CUDA_CALL(cudaMalloc(
&in_data, in_n * in_c * in_h * in_w * sizeof(float)));
// filter
const int filt_k = 1;
const int filt_c = 1;
const int filt_h = 2;
const int filt_w = 2;
std::cout << "filt_k: " << filt_k << std::endl;
std::cout << "filt_c: " << filt_c << std::endl;
std::cout << "filt_h: " << filt_h << std::endl;
std::cout << "filt_w: " << filt_w << std::endl;
std::cout << std::endl;
// CHECK: hipdnnFilterDescriptor_t filt_desc;
cudnnFilterDescriptor_t filt_desc;
// CHECK: CUDNN_CALL(hipdnnCreateFilterDescriptor(&filt_desc));
CUDNN_CALL(cudnnCreateFilterDescriptor(&filt_desc));
// CHECK: CUDNN_CALL(hipdnnSetFilter4dDescriptor(
CUDNN_CALL(cudnnSetFilter4dDescriptor(
// CHECK: filt_desc, HIPDNN_DATA_FLOAT, HIPDNN_TENSOR_NCHW,
filt_desc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
filt_k, filt_c, filt_h, filt_w));
float *filt_data;
// CUDA_CALL(hipMalloc(
CUDA_CALL(cudaMalloc(
&filt_data, filt_k * filt_c * filt_h * filt_w * sizeof(float)));
// convolution
const int pad_h = 1;
const int pad_w = 1;
const int str_h = 1;
const int str_w = 1;
const int dil_h = 1;
const int dil_w = 1;
std::cout << "pad_h: " << pad_h << std::endl;
std::cout << "pad_w: " << pad_w << std::endl;
std::cout << "str_h: " << str_h << std::endl;
std::cout << "str_w: " << str_w << std::endl;
std::cout << "dil_h: " << dil_h << std::endl;
std::cout << "dil_w: " << dil_w << std::endl;
std::cout << std::endl;
// CHECK: hipdnnConvolutionDescriptor_t conv_desc;
cudnnConvolutionDescriptor_t conv_desc;
// CUDNN_CALL(hipdnnCreateConvolutionDescriptor(&conv_desc));
CUDNN_CALL(cudnnCreateConvolutionDescriptor(&conv_desc));
// CHECK: CUDNN_CALL(hipdnnSetConvolution2dDescriptor(
CUDNN_CALL(cudnnSetConvolution2dDescriptor(
conv_desc,
pad_h, pad_w, str_h, str_w, dil_h, dil_w,
// CHECK: HIPDNN_CONVOLUTION, HIPDNN_DATA_FLOAT));
CUDNN_CONVOLUTION, CUDNN_DATA_FLOAT));
// output
int out_n;
int out_c;
int out_h;
int out_w;
// CHECK: CUDNN_CALL(hipdnnGetConvolution2dForwardOutputDim(
CUDNN_CALL(cudnnGetConvolution2dForwardOutputDim(
conv_desc, in_desc, filt_desc,
&out_n, &out_c, &out_h, &out_w));
std::cout << "out_n: " << out_n << std::endl;
std::cout << "out_c: " << out_c << std::endl;
std::cout << "out_h: " << out_h << std::endl;
std::cout << "out_w: " << out_w << std::endl;
std::cout << std::endl;
// CHECK: hipdnnTensorDescriptor_t out_desc;
cudnnTensorDescriptor_t out_desc;
// CHECK: CUDNN_CALL(hipdnnCreateTensorDescriptor(&out_desc));
CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc));
// CHECK: CUDNN_CALL(hipdnnSetTensor4dDescriptor(
CUDNN_CALL(cudnnSetTensor4dDescriptor(
// CHECK: out_desc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT,
out_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT,
out_n, out_c, out_h, out_w));
float *out_data;
// CHECK: CUDA_CALL(hipMalloc(
CUDA_CALL(cudaMalloc(
&out_data, out_n * out_c * out_h * out_w * sizeof(float)));
// algorithm
// CHECK: hipdnnConvolutionFwdAlgo_t algo;
cudnnConvolutionFwdAlgo_t algo;
// CHECK: CUDNN_CALL(hipdnnGetConvolutionForwardAlgorithm(
CUDNN_CALL(cudnnGetConvolutionForwardAlgorithm(
cudnn,
in_desc, filt_desc, conv_desc, out_desc,
// CHECK: HIPDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo));
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo));
std::cout << "Convolution algorithm: " << algo << std::endl;
std::cout << std::endl;
// workspace
size_t ws_size;
// CHECK: CUDNN_CALL(hipdnnGetConvolutionForwardWorkspaceSize(
CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize(
cudnn, in_desc, filt_desc, conv_desc, out_desc, algo, &ws_size));
float *ws_data;
// CHECK: CUDA_CALL(hipMalloc(&ws_data, ws_size));
CUDA_CALL(cudaMalloc(&ws_data, ws_size));
std::cout << "Workspace size: " << ws_size << std::endl;
std::cout << std::endl;
// perform
float alpha = 1.f;
float beta = 0.f;
// CHECK: hipLaunchKernelGGL(dev_iota, dim3(in_w * in_h), dim3(in_n * in_c), 0, 0, in_data);
// CHECK: hipLaunchKernelGGL(dev_const, dim3(filt_w * filt_h), dim3(filt_k * filt_c), 0, 0, filt_data, 1.f);
dev_iota<<<in_w * in_h, in_n * in_c>>>(in_data);
dev_const<<<filt_w * filt_h, filt_k * filt_c>>>(filt_data, 1.f);
// CHECK: CUDNN_CALL(hipdnnConvolutionForward(
CUDNN_CALL(cudnnConvolutionForward(
cudnn,
&alpha, in_desc, in_data, filt_desc, filt_data,
conv_desc, algo, ws_data, ws_size,
&beta, out_desc, out_data));
// results
std::cout << "in_data:" << std::endl;
print(in_data, in_n, in_c, in_h, in_w);
std::cout << "filt_data:" << std::endl;
print(filt_data, filt_k, filt_c, filt_h, filt_w);
std::cout << "out_data:" << std::endl;
print(out_data, out_n, out_c, out_h, out_w);
// finalizing
// CHECK: CUDA_CALL(hipFree(ws_data));
CUDA_CALL(cudaFree(ws_data));
// CHECK: CUDA_CALL(hipFree(out_data));
CUDA_CALL(cudaFree(out_data));
// CHECK: CUDNN_CALL(hipdnnDestroyTensorDescriptor(out_desc));
CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc));
// CHECK: CUDNN_CALL(hipdnnDestroyConvolutionDescriptor(conv_desc));
CUDNN_CALL(cudnnDestroyConvolutionDescriptor(conv_desc));
// CHECK: CUDA_CALL(hipFree(filt_data));
CUDA_CALL(cudaFree(filt_data));
// CHECK: CUDNN_CALL(hipdnnDestroyFilterDescriptor(filt_desc));
CUDNN_CALL(cudnnDestroyFilterDescriptor(filt_desc));
// CHECK: CUDA_CALL(hipFree(in_data));
CUDA_CALL(cudaFree(in_data));
// CHECK: CUDNN_CALL(hipdnnDestroyTensorDescriptor(in_desc));
CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc));
// CHECK: CUDNN_CALL(hipdnnDestroy(cudnn));
CUDNN_CALL(cudnnDestroy(cudnn));
return 0;
}
+3 -1
Просмотреть файл
@@ -57,6 +57,8 @@ else:
run_test_ext = ".sh"
clang_args += " -isystem'%s'/samples/common/inc"
config.substitutions.append(("%cuda_args", clang_args % (config.cuda_root, config.cuda_sdk_root)))
clang_args += " -I'%s'/include"
config.substitutions.append(("%cuda_args", clang_args % (config.cuda_root, config.cuda_sdk_root, config.cuda_dnn_root)))
config.substitutions.append(("hipify", '"' + hipify_path + "/hipify-clang" + '"'))
config.substitutions.append(("%run_test", '"' + config.test_source_root + "/run_test" + run_test_ext + '"'))
+1
Просмотреть файл
@@ -4,6 +4,7 @@ import os
config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@"
config.obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
config.cuda_root = "@CUDA_TOOLKIT_ROOT_DIR@"
config.cuda_dnn_root = "@CUDA_DNN_ROOT_DIR@"
if sys.platform in ['win32']:
config.cuda_sdk_root = "@CUDA_SDK_ROOT_DIR@"
if not config.cuda_sdk_root or config.cuda_sdk_root == "CUDA_SDK_ROOT_DIR-NOTFOUND":