From c0224ff0b9b4e94f776a39af9757a4cb68e95b3a Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 28 Nov 2018 20:10:30 +0300 Subject: [PATCH] [HIPIFY][SPARSE] Preconditioners Reference: Incomplete Cholesky Factorization: level 0 + cuSPARSE_05 test + update CUSPARSE_API_supported_by_HIP.md [ROCm/clr commit: e90373c9276d4d9a48e10cd6d19b88d016eb5758] --- .../markdown/CUSPARSE_API_supported_by_HIP.md | 37 +++ .../src/CUDA2HIP_SPARSE_API_functions.cpp | 36 +++ .../hipify-clang/cuSPARSE/cuSPARSE_05.cu | 288 ++++++++++++++++++ 3 files changed, 361 insertions(+) create mode 100644 projects/clr/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_05.cu diff --git a/projects/clr/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md b/projects/clr/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md index be82f35310..c0f62f9f9d 100644 --- a/projects/clr/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md +++ b/projects/clr/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md @@ -312,3 +312,40 @@ |`cusparseDcsrgemm2_bufferSizeExt` | | |`cusparseCcsrgemm2_bufferSizeExt` | | |`cusparseZcsrgemm2_bufferSizeExt` | | + +## **7. cuSPARSE Preconditioners Reference** + +## ***7.1. Incomplete Cholesky Factorization: level 0*** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------------------------| +|`cusparseScsric0` | | +|`cusparseDcsric0` | | +|`cusparseCcsric0` | | +|`cusparseZcsric0` | | +|`cusparseScsric02_bufferSize` | | +|`cusparseDcsric02_bufferSize` | | +|`cusparseCcsric02_bufferSize` | | +|`cusparseZcsric02_bufferSize` | | +|`cusparseScsric02_analysis` | | +|`cusparseDcsric02_analysis` | | +|`cusparseCcsric02_analysis` | | +|`cusparseZcsric02_analysis` | | +|`cusparseScsric02` | | +|`cusparseDcsric02` | | +|`cusparseCcsric02` | | +|`cusparseZcsric02` | | +|`cusparseXcsric02_zeroPivot` | | +|`cusparseSbsric02_bufferSize` | | +|`cusparseDbsric02_bufferSize` | | +|`cusparseCbsric02_bufferSize` | | +|`cusparseZbsric02_bufferSize` | | +|`cusparseSbsric02_analysis` | | +|`cusparseDbsric02_analysis` | | +|`cusparseCbsric02_analysis` | | +|`cusparseZbsric02_analysis` | | +|`cusparseSbsric02` | | +|`cusparseDbsric02` | | +|`cusparseCbsric02` | | +|`cusparseZbsric02` | | +|`cusparseXbsric02_zeroPivot` | | diff --git a/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp b/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp index ed46fc5a54..38bac33854 100644 --- a/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp +++ b/projects/clr/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp @@ -260,4 +260,40 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseDcsrgemm2_bufferSizeExt", {"hipsparseDcsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseCcsrgemm2_bufferSizeExt", {"hipsparseCcsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseZcsrgemm2_bufferSizeExt", {"hipsparseZcsrgemm2_bufferSizeExt", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + // 10. cuSPARSE Preconditioners Reference + // 10.1. Incomplete Cholesky Factorization : level 0 + {"cusparseScsric0", {"hipsparseScsric0", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsric0", {"hipsparseDcsric0", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsric0", {"hipsparseCcsric0", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsric0", {"hipsparseZcsric0", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsric02_bufferSize", {"hipsparseScsric02_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsric02_bufferSize", {"hipsparseDcsric02_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsric02_bufferSize", {"hipsparseCcsric02_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsric02_bufferSize", {"hipsparseZcsric02_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsric02_analysis", {"hipsparseScsric02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsric02_analysis", {"hipsparseDcsric02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsric02_analysis", {"hipsparseCcsric02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsric02_analysis", {"hipsparseZcsric02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsric02", {"hipsparseScsric02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsric02", {"hipsparseDcsric02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsric02", {"hipsparseCcsric02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsric02", {"hipsparseZcsric02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseXcsric02_zeroPivot", {"hipsparseXcsric02_zeroPivot", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSbsric02_analysis", {"hipsparseSbsric02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDbsric02_analysis", {"hipsparseDbsric02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCbsric02_analysis", {"hipsparseCbsric02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZbsric02_analysis", {"hipsparseZbsric02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSbsric02", {"hipsparseSbsric02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDbsric02", {"hipsparseDbsric02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCbsric02", {"hipsparseCbsric02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZbsric02", {"hipsparseZbsric02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseXbsric02_zeroPivot", {"hipsparseXbsric02_zeroPivot", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, }; diff --git a/projects/clr/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_05.cu b/projects/clr/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_05.cu new file mode 100644 index 0000000000..c6e7374d0b --- /dev/null +++ b/projects/clr/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_05.cu @@ -0,0 +1,288 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args +#include +#include +#include +// CHECK: #include +#include +// CHECK: #include +#include + +void printCsr( + int m, + int n, + int nnz, + // CHECK: const hipsparseMatDescr_t descrA, + const cusparseMatDescr_t descrA, + const float *csrValA, + const int *csrRowPtrA, + const int *csrColIndA, + const char* name) +{ + // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; + const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; + + printf("matrix %s is %d-by-%d, nnz=%d, base=%d, output base-1\n", name, m, n, nnz, base); + for (int row = 0; row < m; row++) { + const int start = csrRowPtrA[row] - base; + const int end = csrRowPtrA[row + 1] - base; + for (int colidx = start; colidx < end; colidx++) { + const int col = csrColIndA[colidx] - base; + const float Areg = csrValA[colidx]; + printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); + } + } +} + +int main(int argc, char*argv[]) +{ + // CHECK: hipsparseHandle_t handle = NULL; + cusparseHandle_t handle = NULL; + // CHECK: hipStream_t stream = NULL; + cudaStream_t stream = NULL; + // CHECK: hipsparseMatDescr_t descrA = NULL; + cusparseMatDescr_t descrA = NULL; + // CHECK: hipsparseMatDescr_t descrC = NULL; + cusparseMatDescr_t descrC = NULL; + // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; + cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; + // CHECK: hipError_t cudaStat1 = hipSuccess; + cudaError_t cudaStat1 = cudaSuccess; + const int m = 4; + const int n = 4; + const int nnzA = 9; + /* + * | 1 0 2 -3 | + * | 0 4 0 0 | + * A = | 5 0 6 7 | + * | 0 8 0 9 | + * + */ + + const int csrRowPtrA[m + 1] = { 1, 4, 5, 8, 10 }; + const int csrColIndA[nnzA] = { 1, 3, 4, 2, 1, 3, 4, 2, 4 }; + const float csrValA[nnzA] = { 1, 2, -3, 4, 5, 6, 7, 8, 9 }; + + int* csrRowPtrC = NULL; + int* csrColIndC = NULL; + float* csrValC = NULL; + + int *d_csrRowPtrA = NULL; + int *d_csrColIndA = NULL; + float *d_csrValA = NULL; + + int *d_csrRowPtrC = NULL; + int *d_csrColIndC = NULL; + float *d_csrValC = NULL; + + size_t lworkInBytes = 0; + char *d_work = NULL; + + int nnzC = 0; + + float threshold = 4.1; /* remove Aij <= 4.1 */ +// float threshold = 0; /* remove zeros */ + + printf("example of pruneCsr2csr \n"); + + printf("prune |A(i,j)| <= threshold \n"); + printf("threshold = %E \n", threshold); + + /* step 1: create cusparse handle, bind a stream */ + // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); + cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: status = hipsparseCreate(&handle); + status = cusparseCreate(&handle); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: status = hipsparseSetStream(handle, stream); + status = cusparseSetStream(handle, stream); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + + /* step 2: configuration of matrix A and C */ + // CHECK: status = hipsparseCreateMatDescr(&descrA); + status = cusparseCreateMatDescr(&descrA); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + /* A is base-1*/ + // CHECK: hipsparseSetMatIndexBase(descrA, HIPSPARSE_INDEX_BASE_ONE); + cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE); + // CHECK: hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL); + // CHECK: status = hipsparseCreateMatDescr(&descrC); + status = cusparseCreateMatDescr(&descrC); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + /* C is base-0 */ + // CHECK: hipsparseSetMatIndexBase(descrC, HIPSPARSE_INDEX_BASE_ZERO); + cusparseSetMatIndexBase(descrC, CUSPARSE_INDEX_BASE_ZERO); + // CHECK: hipsparseSetMatType(descrC, HIPSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatType(descrC, CUSPARSE_MATRIX_TYPE_GENERAL); + + printCsr(m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, "A"); + // CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrA, sizeof(int)*(m + 1)); + cudaStat1 = cudaMalloc((void**)&d_csrRowPtrA, sizeof(int)*(m + 1)); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); + cudaStat1 = cudaMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMalloc((void**)&d_csrValA, sizeof(float)*nnzA); + cudaStat1 = cudaMalloc((void**)&d_csrValA, sizeof(float)*nnzA); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); + cudaStat1 = cudaMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(m + 1), hipMemcpyHostToDevice); + cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(m + 1), cudaMemcpyHostToDevice); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, hipMemcpyHostToDevice); + cudaStat1 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, cudaMemcpyHostToDevice); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, hipMemcpyHostToDevice); + cudaStat1 = cudaMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, cudaMemcpyHostToDevice); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + + /* step 3: query workspace */ + // TODO: status = hipsparseSpruneCsr2csr_bufferSizeExt( + status = cusparseSpruneCsr2csr_bufferSizeExt( + handle, + m, + n, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + &threshold, + descrC, + d_csrValC, + d_csrRowPtrC, + d_csrColIndC, + &lworkInBytes); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + + printf("lworkInBytes (prune) = %lld \n", (long long)lworkInBytes); + // CHECK: if (NULL != d_work) { hipFree(d_work); } + if (NULL != d_work) { cudaFree(d_work); } + // cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); + cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + + /* step 4: compute csrRowPtrC and nnzC */ + // TODO: status = hipsparseSpruneCsr2csrNnz( + status = cusparseSpruneCsr2csrNnz( + handle, + m, + n, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + &threshold, + descrC, + d_csrRowPtrC, + &nnzC, /* host */ + d_work); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: cudaStat1 = hipDeviceSynchronize(); + cudaStat1 = cudaDeviceSynchronize(); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + printf("nnzC = %d\n", nnzC); + if (0 == nnzC) { + printf("C is empty \n"); + return 0; + } + /* step 5: compute csrColIndC and csrValC */ + // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); + cudaStat1 = cudaMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMalloc((void**)&d_csrValC, sizeof(float) * nnzC); + cudaStat1 = cudaMalloc((void**)&d_csrValC, sizeof(float) * nnzC); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // TODO: status = hipsparseSpruneCsr2csr( + status = cusparseSpruneCsr2csr( + handle, + m, + n, + nnzA, + descrA, + d_csrValA, + d_csrRowPtrA, + d_csrColIndA, + &threshold, + descrC, + d_csrValC, + d_csrRowPtrC, + d_csrColIndC, + d_work); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: cudaStat1 = hipDeviceSynchronize(); + cudaStat1 = cudaDeviceSynchronize(); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + + /* step 6: output C */ + csrRowPtrC = (int*)malloc(sizeof(int)*(m + 1)); + csrColIndC = (int*)malloc(sizeof(int)*nnzC); + csrValC = (float*)malloc(sizeof(float)*nnzC); + assert(NULL != csrRowPtrC); + assert(NULL != csrColIndC); + assert(NULL != csrValC); + // CHECK: cudaStat1 = hipMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), hipMemcpyDeviceToHost); + cudaStat1 = cudaMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), cudaMemcpyDeviceToHost); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, hipMemcpyDeviceToHost); + cudaStat1 = cudaMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, cudaMemcpyDeviceToHost); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: cudaStat1 = hipMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, hipMemcpyDeviceToHost); + cudaStat1 = cudaMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, cudaMemcpyDeviceToHost); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + printCsr(m, n, nnzC, descrC, csrValC, csrRowPtrC, csrColIndC, "C"); + /* free resources */ + // CHECK: if (d_csrRowPtrA) hipFree(d_csrRowPtrA); + if (d_csrRowPtrA) cudaFree(d_csrRowPtrA); + // CHECK: if (d_csrColIndA) hipFree(d_csrColIndA); + if (d_csrColIndA) cudaFree(d_csrColIndA); + // CHECK: if (d_csrValA) hipFree(d_csrValA); + if (d_csrValA) cudaFree(d_csrValA); + // CHECK: if (d_csrRowPtrC) hipFree(d_csrRowPtrC); + if (d_csrRowPtrC) cudaFree(d_csrRowPtrC); + // CHECK: if (d_csrColIndC) hipFree(d_csrColIndC); + if (d_csrColIndC) cudaFree(d_csrColIndC); + // CHECK: if (d_csrValC) hipFree(d_csrValC); + if (d_csrValC) cudaFree(d_csrValC); + if (csrRowPtrC) free(csrRowPtrC); + if (csrColIndC) free(csrColIndC); + if (csrValC) free(csrValC); + // CHECK: if (handle) hipsparseDestroy(handle); + if (handle) cusparseDestroy(handle); + // CHECK: if (stream) hipStreamDestroy(stream); + if (stream) cudaStreamDestroy(stream); + // CHECK: if (descrA) hipsparseDestroyMatDescr(descrA); + if (descrA) cusparseDestroyMatDescr(descrA); + // CHECK: if (descrC) hipsparseDestroyMatDescr(descrC); + if (descrC) cusparseDestroyMatDescr(descrC); + // CHECK: hipDeviceReset(); + cudaDeviceReset(); + return 0; +}