From 239104d125dcf7893c274976d67e51a5c35fcccb Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 27 Nov 2018 12:41:50 +0300 Subject: [PATCH] [HIPIFY][SPARSE] Extra functions + cuSPARSE_04 test + update CUSPARSE_API_supported_by_HIP.md [ROCm/clr commit: c75b3c444ba6cf0dd13928fd16f07bbb902bdb04] --- .../markdown/CUSPARSE_API_supported_by_HIP.md | 30 +- .../src/CUDA2HIP_SPARSE_API_functions.cpp | 21 ++ .../hipify-clang/cuSPARSE/cuSPARSE_04.cu | 261 ++++++++++++++++++ 3 files changed, 307 insertions(+), 5 deletions(-) create mode 100644 projects/clr/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_04.cu diff --git a/projects/clr/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md b/projects/clr/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md index e871de927f..be82f35310 100644 --- a/projects/clr/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md +++ b/projects/clr/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md @@ -1,6 +1,6 @@ # CUSPARSE API supported by HIP -## **1. CUSPARSE Data types** +## **1. cuSPARSE Data types** | **type** | **CUDA** | **HIP** | |-------------:|---------------------------------------------------------------|------------------------------------------------------------| @@ -83,7 +83,7 @@ | struct |`pruneInfo` | | | typedef |`pruneInfo_t` | | -## **2.cuSPARSE Helper Function Reference** +## **2. cuSPARSE Helper Function Reference** | **CUDA** | **HIP** | |-----------------------------------------------------------|-------------------------------------------------| @@ -129,7 +129,7 @@ |`cusparseCreatePruneInfo` | | |`cusparseDestroyPruneInfo` | | -## **3.cuSPARSE Level 1 Function Reference** +## **3. cuSPARSE Level 1 Function Reference** | **CUDA** | **HIP** | |-----------------------------------------------------------|-------------------------------------------------| @@ -158,7 +158,7 @@ |`cusparseCsctr` | | |`cusparseZsctr` | | -## **4.cuSPARSE Level 2 Function Reference** +## **4. cuSPARSE Level 2 Function Reference** | **CUDA** | **HIP** | |-----------------------------------------------------------|-------------------------------------------------| @@ -237,7 +237,7 @@ |`cusparseChybsv_solve` | | |`cusparseZhybsv_solve` | | -## **5.cuSPARSE Level 3 Function Reference** +## **5. cuSPARSE Level 3 Function Reference** | **CUDA** | **HIP** | |-----------------------------------------------------------|-------------------------------------------------| @@ -292,3 +292,23 @@ |`cusparseCgemmi` | | |`cusparseZgemmi` | | +## **6. cuSPARSE Extra Function Reference** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------------------------| +|`cusparseXcsrgeamNnz` | | +|`cusparseScsrgeam` | | +|`cusparseDcsrgeam` | | +|`cusparseCcsrgeam` | | +|`cusparseScsrgeam2_bufferSizeExt` | | +|`cusparseDcsrgeam2_bufferSizeExt` | | +|`cusparseCcsrgeam2_bufferSizeExt` | | +|`cusparseZcsrgeam2_bufferSizeExt` | | +|`cusparseXcsrgemmNnz` | | +|`cusparseScsrgemm` | | +|`cusparseDcsrgemm` | | +|`cusparseCcsrgemm` | | +|`cusparseScsrgemm2_bufferSizeExt` | | +|`cusparseDcsrgemm2_bufferSizeExt` | | +|`cusparseCcsrgemm2_bufferSizeExt` | | +|`cusparseZcsrgemm2_bufferSizeExt` | | diff --git a/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp b/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp index 1ea1365c19..ed46fc5a54 100644 --- a/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp +++ b/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp @@ -239,4 +239,25 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseDgemmi", {"hipsparseDgemmi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseCgemmi", {"hipsparseCgemmi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseZgemmi", {"hipsparseZgemmi", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + // 9. cuSPARSE Extra Function Reference + {"cusparseXcsrgeamNnz", {"hipsparseXcsrgeamNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseScsrgeam", {"hipsparseScsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsrgeam", {"hipsparseDcsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsrgeam", {"hipsparseCcsrgeam", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsrgeam2_bufferSizeExt", {"hipsparseScsrgeam2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsrgeam2_bufferSizeExt", {"hipsparseDcsrgeam2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsrgeam2_bufferSizeExt", {"hipsparseCcsrgeam2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrgeam2_bufferSizeExt", {"hipsparseZcsrgeam2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseXcsrgemmNnz", {"hipsparseXcsrgemmNnz", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseScsrgemm", {"hipsparseScsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsrgemm", {"hipsparseDcsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsrgemm", {"hipsparseCcsrgemm", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsrgemm2_bufferSizeExt", {"hipsparseScsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsrgemm2_bufferSizeExt", {"hipsparseDcsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsrgemm2_bufferSizeExt", {"hipsparseCcsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrgemm2_bufferSizeExt", {"hipsparseZcsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, }; diff --git a/projects/clr/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_04.cu b/projects/clr/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_04.cu new file mode 100644 index 0000000000..32760f4fa7 --- /dev/null +++ b/projects/clr/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_04.cu @@ -0,0 +1,261 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args +#include +#include +#include +// CHECK: #include +#include +// CHECK: #include +#include + +void printMatrix(int m, int n, const float*A, int lda, const char* name) +{ + for (int row = 0; row < m; row++) { + for (int col = 0; col < n; col++) { + float Areg = A[row + col * lda]; + printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); + } + } +} + +void printCsr( + int m, + int n, + int nnz, + // CHECK: const hipsparseMatDescr_t descrA, + const cusparseMatDescr_t descrA, + const float *csrValA, + const int *csrRowPtrA, + const int *csrColIndA, + const char* name) +{ + // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; + const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; + + printf("matrix %s is %d-by-%d, nnz=%d, base=%d\n", name, m, n, nnz, base); + for (int row = 0; row < m; row++) { + const int start = csrRowPtrA[row] - base; + const int end = csrRowPtrA[row + 1] - base; + for (int colidx = start; colidx < end; colidx++) { + const int col = csrColIndA[colidx] - base; + const float Areg = csrValA[colidx]; + printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); + } + } +} + +int main(int argc, char*argv[]) +{ + // CHECK: hipsparseHandle_t handle = NULL; + cusparseHandle_t handle = NULL; + // CHECK: hipStream_t stream = NULL; + cudaStream_t stream = NULL; + // CHECK: hipsparseMatDescr_t descrC = NULL; + cusparseMatDescr_t descrC = NULL; + // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; + cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; + // CHECK: hipError_t cudaStat1 = hipSuccess; + // CHECK: hipError_t cudaStat2 = hipSuccess; + // CHECK: hipError_t cudaStat3 = hipSuccess; + // CHECK: hipError_t cudaStat4 = hipSuccess; + // CHECK: hipError_t cudaStat5 = hipSuccess; + cudaError_t cudaStat1 = cudaSuccess; + cudaError_t cudaStat2 = cudaSuccess; + cudaError_t cudaStat3 = cudaSuccess; + cudaError_t cudaStat4 = cudaSuccess; + cudaError_t cudaStat5 = cudaSuccess; + const int m = 4; + const int n = 4; + const int lda = m; + /* + * | 1 0 2 -3 | + * | 0 4 0 0 | + * A = | 5 0 6 7 | + * | 0 8 0 9 | + * + */ + const float A[lda*n] = { 1, 0, 5, 0, 0, 4, 0, 8, 2, 0, 6, 0, -3, 0, 7, 9 }; + int* csrRowPtrC = NULL; + int* csrColIndC = NULL; + float* csrValC = NULL; + + float *d_A = NULL; + int *d_csrRowPtrC = NULL; + int *d_csrColIndC = NULL; + float *d_csrValC = NULL; + + size_t lworkInBytes = 0; + char *d_work = NULL; + + int nnzC = 0; + + float threshold = 4.1; /* remove Aij <= 4.1 */ +// float threshold = 0; /* remove zeros */ + + printf("example of pruneDense2csr \n"); + + printf("prune |A(i,j)| <= threshold \n"); + printf("threshold = %E \n", threshold); + + printMatrix(m, n, A, lda, "A"); + + /* step 1: create cusparse handle, bind a stream */ + // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); + cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: status = hipsparseCreate(&handle); + status = cusparseCreate(&handle); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: status = hipsparseSetStream(handle, stream); + status = cusparseSetStream(handle, stream); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + + /* step 2: configuration of matrix C */ + // CHECK: status = hipsparseCreateMatDescr(&descrC); + status = cusparseCreateMatDescr(&descrC); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: hipsparseSetMatIndexBase(descrC, HIPSPARSE_INDEX_BASE_ZERO); + cusparseSetMatIndexBase(descrC, CUSPARSE_INDEX_BASE_ZERO); + // CHECK: hipsparseSetMatType(descrC, HIPSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatType(descrC, CUSPARSE_MATRIX_TYPE_GENERAL); + // CHECK: cudaStat1 = hipMalloc((void**)&d_A, sizeof(float)*lda*n); + cudaStat1 = cudaMalloc((void**)&d_A, sizeof(float)*lda*n); + // CHECK: cudaStat2 = hipMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); + cudaStat2 = cudaMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); + // CHECK: assert(hipSuccess == cudaStat1); + // CHECK: assert(hipSuccess == cudaStat2); + assert(cudaSuccess == cudaStat1); + assert(cudaSuccess == cudaStat2); + + /* step 3: query workspace */ + // CHECK: cudaStat1 = hipMemcpy(d_A, A, sizeof(float)*lda*n, hipMemcpyHostToDevice); + cudaStat1 = cudaMemcpy(d_A, A, sizeof(float)*lda*n, cudaMemcpyHostToDevice); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // TODO: status = hipsparseSpruneDense2csr_bufferSizeExt( + status = cusparseSpruneDense2csr_bufferSizeExt( + handle, + m, + n, + d_A, + lda, + &threshold, + descrC, + d_csrValC, + d_csrRowPtrC, + d_csrColIndC, + &lworkInBytes); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + + printf("lworkInBytes (prune) = %lld \n", (long long)lworkInBytes); + // CHECK: if (NULL != d_work) { hipFree(d_work); } + if (NULL != d_work) { cudaFree(d_work); } + // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); + cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + + /* step 4: compute csrRowPtrC and nnzC */ + // TODO: status = hipsparseSpruneDense2csrNnz( + status = cusparseSpruneDense2csrNnz( + handle, + m, + n, + d_A, + lda, + &threshold, + descrC, + d_csrRowPtrC, + &nnzC, /* host */ + d_work); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: cudaStat1 = hipDeviceSynchronize(); + cudaStat1 = cudaDeviceSynchronize(); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + + printf("nnzC = %d\n", nnzC); + if (0 == nnzC) { + printf("C is empty \n"); + return 0; + } + + /* step 5: compute csrColIndC and csrValC */ + // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); + cudaStat1 = cudaMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); + // CHECK: cudaStat2 = hipMalloc((void**)&d_csrValC, sizeof(float) * nnzC); + cudaStat2 = cudaMalloc((void**)&d_csrValC, sizeof(float) * nnzC); + // CHECK: assert(hipSuccess == cudaStat1); + // CHECK: assert(hipSuccess == cudaStat2); + assert(cudaSuccess == cudaStat1); + assert(cudaSuccess == cudaStat2); + // TODO: status = hipsparseSpruneDense2csr( + status = cusparseSpruneDense2csr( + handle, + m, + n, + d_A, + lda, + &threshold, + descrC, + d_csrValC, + d_csrRowPtrC, + d_csrColIndC, + d_work); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: cudaStat1 = hipDeviceSynchronize(); + cudaStat1 = cudaDeviceSynchronize(); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + + /* step 6: output C */ + csrRowPtrC = (int*)malloc(sizeof(int)*(m + 1)); + csrColIndC = (int*)malloc(sizeof(int)*nnzC); + csrValC = (float*)malloc(sizeof(float)*nnzC); + assert(NULL != csrRowPtrC); + assert(NULL != csrColIndC); + assert(NULL != csrValC); + // CHECK: cudaStat1 = hipMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), hipMemcpyDeviceToHost); + cudaStat1 = cudaMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), cudaMemcpyDeviceToHost); + // CHECK: cudaStat2 = hipMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, hipMemcpyDeviceToHost); + cudaStat2 = cudaMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, cudaMemcpyDeviceToHost); + // CHECK: cudaStat3 = hipMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, hipMemcpyDeviceToHost); + cudaStat3 = cudaMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, cudaMemcpyDeviceToHost); + // CHECK: assert(hipSuccess == cudaStat1); + // CHECK: assert(hipSuccess == cudaStat2); + // CHECK: assert(hipSuccess == cudaStat3); + assert(cudaSuccess == cudaStat1); + assert(cudaSuccess == cudaStat2); + assert(cudaSuccess == cudaStat3); + + printCsr(m, n, nnzC, descrC, csrValC, csrRowPtrC, csrColIndC, "C"); + + /* free resources */ + // CHECK: if (d_A) hipFree(d_A); + if (d_A) cudaFree(d_A); + // CHECK: if (d_csrRowPtrC) hipFree(d_csrRowPtrC); + if (d_csrRowPtrC) cudaFree(d_csrRowPtrC); + // CHECK: if (d_csrColIndC) hipFree(d_csrColIndC); + if (d_csrColIndC) cudaFree(d_csrColIndC); + // CHECK: if (d_csrValC) hipFree(d_csrValC); + if (d_csrValC) cudaFree(d_csrValC); + + if (csrRowPtrC) free(csrRowPtrC); + if (csrColIndC) free(csrColIndC); + if (csrValC) free(csrValC); + // CHECK: if (handle) hipsparseDestroy(handle); + if (handle) cusparseDestroy(handle); + // CHECK: if (stream) hipStreamDestroy(stream); + if (stream) cudaStreamDestroy(stream); + // CHECK: if (descrC) hipsparseDestroyMatDescr(descrC); + if (descrC) cusparseDestroyMatDescr(descrC); + // CHECK: hipDeviceReset(); + cudaDeviceReset(); + return 0; +}