From ef99d6dee483f69ebc3bd5f18bf151c3c47db12d Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 29 Nov 2018 15:59:58 +0300 Subject: [PATCH] [HIPIFY][SPARSE] Preconditioners Reference: Incomplete LU Factorization: level 0 + cuSPARSE_06 test + update CUSPARSE_API_supported_by_HIP.md --- .../markdown/CUSPARSE_API_supported_by_HIP.md | 44 +++ .../src/CUDA2HIP_SPARSE_API_functions.cpp | 52 ++++ .../hipify-clang/cuSPARSE/cuSPARSE_06.cu | 269 ++++++++++++++++++ 3 files changed, 365 insertions(+) create mode 100644 hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_06.cu diff --git a/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md b/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md index c0f62f9f9d..d15ab83596 100644 --- a/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md +++ b/hipamd/docs/markdown/CUSPARSE_API_supported_by_HIP.md @@ -349,3 +349,47 @@ |`cusparseCbsric02` | | |`cusparseZbsric02` | | |`cusparseXbsric02_zeroPivot` | | + +## ***7.2. Incomplete LU Factorization: level 0*** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------------------------| +|`cusparseScsrilu0` | | +|`cusparseDcsrilu0` | | +|`cusparseCcsrilu0` | | +|`cusparseZcsrilu0` | | +|`cusparseCsrilu0Ex` | | +|`cusparseScsrilu02_numericBoost` | | +|`cusparseDcsrilu02_numericBoost` | | +|`cusparseCcsrilu02_numericBoost` | | +|`cusparseZcsrilu02_numericBoost` | | +|`cusparseScsrilu02_bufferSize` |`hipsparseScsrilu02_bufferSize` | +|`cusparseDcsrilu02_bufferSize` |`hipsparseDcsrilu02_bufferSize` | +|`cusparseCcsrilu02_bufferSize` | | +|`cusparseZcsrilu02_bufferSize` | | +|`cusparseScsrilu02_analysis` |`hipsparseScsrilu02_analysis` | +|`cusparseDcsrilu02_analysis` |`hipsparseDcsrilu02_analysis` | +|`cusparseCcsrilu02_analysis` | | +|`cusparseZcsrilu02_analysis` | | +|`cusparseScsrilu02` |`hipsparseScsrilu02` | +|`cusparseDcsrilu02` |`hipsparseDcsrilu02` | +|`cusparseCcsrilu02` | | +|`cusparseZcsrilu02` | | +|`cusparseXbsric02_zeroPivot` |`hipsparseXcsrilu02_zeroPivot` | +|`cusparseSbsrilu02_numericBoost` | | +|`cusparseDbsrilu02_numericBoost` | | +|`cusparseCbsrilu02_numericBoost` | | +|`cusparseZbsrilu02_numericBoost` | | +|`cusparseSbsrilu02_bufferSize` | | +|`cusparseDbsrilu02_bufferSize` | | +|`cusparseCbsrilu02_bufferSize` | | +|`cusparseZbsrilu02_bufferSize` | | +|`cusparseSbsrilu02_analysis` | | +|`cusparseDbsrilu02_analysis` | | +|`cusparseCbsrilu02_analysis` | | +|`cusparseZbsrilu02_analysis` | | +|`cusparseSbsrilu02` | | +|`cusparseDbsrilu02` | | +|`cusparseCbsrilu02` | | +|`cusparseZbsrilu02` | | +|`cusparseXbsrilu02_zeroPivot` | | diff --git a/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp b/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp index 38bac33854..a0815608be 100644 --- a/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp +++ b/hipamd/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp @@ -296,4 +296,56 @@ const std::map CUDA_SPARSE_FUNCTION_MAP{ {"cusparseZbsric02", {"hipsparseZbsric02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, {"cusparseXbsric02_zeroPivot", {"hipsparseXbsric02_zeroPivot", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + // 10.2. Incomplete LU Factorization: level 0 + {"cusparseScsrilu0", {"hipsparseScsrilu0", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsrilu0", {"hipsparseDcsrilu0", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsrilu0", {"hipsparseCcsrilu0", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrilu0", {"hipsparseZcsrilu0", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseCsrilu0Ex", {"hipsparseCsrilu0Ex", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsrilu02_numericBoost", {"hipsparseScsrilu02_numericBoost", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDcsrilu02_numericBoost", {"hipsparseDcsrilu02_numericBoost", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCcsrilu02_numericBoost", {"hipsparseCcsrilu02_numericBoost", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrilu02_numericBoost", {"hipsparseZcsrilu02_numericBoost", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsrilu02_bufferSize", {"hipsparseScsrilu02_bufferSize", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsrilu02_bufferSize", {"hipsparseDcsrilu02_bufferSize", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCcsrilu02_bufferSize", {"hipsparseCcsrilu02_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrilu02_bufferSize", {"hipsparseZcsrilu02_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsrilu02_analysis", {"hipsparseScsrilu02_analysis", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsrilu02_analysis", {"hipsparseDcsrilu02_analysis", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCcsrilu02_analysis", {"hipsparseCcsrilu02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrilu02_analysis", {"hipsparseZcsrilu02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseScsrilu02", {"hipsparseScsrilu02", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseDcsrilu02", {"hipsparseDcsrilu02", CONV_LIB_FUNC, API_SPARSE}}, + {"cusparseCcsrilu02", {"hipsparseCcsrilu02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZcsrilu02", {"hipsparseZcsrilu02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseXbsric02_zeroPivot", {"hipsparseXcsrilu02_zeroPivot", CONV_LIB_FUNC, API_SPARSE}}, + + {"cusparseSbsrilu02_numericBoost", {"hipsparseSbsrilu02_numericBoost", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDbsrilu02_numericBoost", {"hipsparseDbsrilu02_numericBoost", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCbsrilu02_numericBoost", {"hipsparseCbsrilu02_numericBoost", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZbsrilu02_numericBoost", {"hipsparseZbsrilu02_numericBoost", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSbsrilu02_bufferSize", {"hipsparseSbsrilu02_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDbsrilu02_bufferSize", {"hipsparseDbsrilu02_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCbsrilu02_bufferSize", {"hipsparseCbsrilu02_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZbsrilu02_bufferSize", {"hipsparseZbsrilu02_bufferSize", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSbsrilu02_analysis", {"hipsparseSbsrilu02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDbsrilu02_analysis", {"hipsparseDbsrilu02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCbsrilu02_analysis", {"hipsparseCbsrilu02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZbsrilu02_analysis", {"hipsparseZbsrilu02_analysis", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseSbsrilu02", {"hipsparseSbsrilu02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseDbsrilu02", {"hipsparseDbsrilu02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseCbsrilu02", {"hipsparseCbsrilu02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + {"cusparseZbsrilu02", {"hipsparseZbsrilu02", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, + + {"cusparseXbsrilu02_zeroPivot", {"hipsparseXbsrilu02_zeroPivot", CONV_LIB_FUNC, API_SPARSE}}, }; diff --git a/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_06.cu b/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_06.cu new file mode 100644 index 0000000000..d38dcd98e4 --- /dev/null +++ b/hipamd/tests/hipify-clang/cuSPARSE/cuSPARSE_06.cu @@ -0,0 +1,269 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args +#include +#include +#include +// CHECK: #include +#include +// CHECK: #include +#include + +void printMatrix(int m, int n, const float*A, int lda, const char* name) +{ + for (int row = 0; row < m; row++) { + for (int col = 0; col < n; col++) { + float Areg = A[row + col * lda]; + printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); + } + } +} + +void printCsr( + int m, + int n, + int nnz, + // CHECK: const hipsparseMatDescr_t descrA, + const cusparseMatDescr_t descrA, + const float *csrValA, + const int *csrRowPtrA, + const int *csrColIndA, + const char* name) +{ + // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; + const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; + + printf("matrix %s is %d-by-%d, nnz=%d, base=%d, output base-1\n", name, m, n, nnz, base); + for (int row = 0; row < m; row++) { + const int start = csrRowPtrA[row] - base; + const int end = csrRowPtrA[row + 1] - base; + for (int colidx = start; colidx < end; colidx++) { + const int col = csrColIndA[colidx] - base; + const float Areg = csrValA[colidx]; + printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); + } + } +} + +int main(int argc, char*argv[]) +{ + // CHECK: hipsparseHandle_t handle = NULL; + cusparseHandle_t handle = NULL; + // CHECK: hipStream_t stream = NULL; + cudaStream_t stream = NULL; + // CHECK: hipsparseMatDescr_t descrC = NULL; + cusparseMatDescr_t descrC = NULL; + pruneInfo_t info = NULL; + // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; + cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; + // CHECK: hipError_t cudaStat1 = hipSuccess; + // CHECK: hipError_t cudaStat2 = hipSuccess; + // CHECK: hipError_t cudaStat3 = hipSuccess; + // CHECK: hipError_t cudaStat4 = hipSuccess; + // CHECK: hipError_t cudaStat5 = hipSuccess; + cudaError_t cudaStat1 = cudaSuccess; + cudaError_t cudaStat2 = cudaSuccess; + cudaError_t cudaStat3 = cudaSuccess; + cudaError_t cudaStat4 = cudaSuccess; + cudaError_t cudaStat5 = cudaSuccess; + const int m = 4; + const int n = 4; + const int lda = m; + /* + * | 1 0 2 -3 | + * | 0 4 0 0 | + * A = | 5 0 6 7 | + * | 0 8 0 9 | + * + */ + const float A[lda*n] = { 1, 0, 5, 0, 0, 4, 0, 8, 2, 0, 6, 0, -3, 0, 7, 9 }; + int* csrRowPtrC = NULL; + int* csrColIndC = NULL; + float* csrValC = NULL; + + float *d_A = NULL; + int *d_csrRowPtrC = NULL; + int *d_csrColIndC = NULL; + float *d_csrValC = NULL; + + size_t lworkInBytes = 0; + char *d_work = NULL; + + int nnzC = 0; + + float percentage = 50; /* 50% of nnz */ + + printf("example of pruneDense2csrByPercentage \n"); + + printf("prune out %.1f percentage of A \n", percentage); + + printMatrix(m, n, A, lda, "A"); + + /* step 1: create cusparse handle, bind a stream */ + // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); + cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: status = hipsparseCreate(&handle); + status = cusparseCreate(&handle); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: status = hipsparseSetStream(handle, stream); + status = cusparseSetStream(handle, stream); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // TODO: status = hipsparseCreatePruneInfo(&info); + status = cusparseCreatePruneInfo(&info); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + + /* step 2: configuration of matrix C */ + // CHECK: status = hipsparseCreateMatDescr(&descrC); + status = cusparseCreateMatDescr(&descrC); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: hipsparseSetMatIndexBase(descrC, HIPSPARSE_INDEX_BASE_ZERO); + cusparseSetMatIndexBase(descrC, CUSPARSE_INDEX_BASE_ZERO); + // CHECK: hipsparseSetMatType(descrC, HIPSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatType(descrC, CUSPARSE_MATRIX_TYPE_GENERAL); + // CHECK: cudaStat1 = hipMalloc((void**)&d_A, sizeof(float)*lda*n); + cudaStat1 = cudaMalloc((void**)&d_A, sizeof(float)*lda*n); + // CHECK: cudaStat2 = hipMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); + cudaStat2 = cudaMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); + // CHECK: assert(hipSuccess == cudaStat1); + // CHECK: assert(hipSuccess == cudaStat2); + assert(cudaSuccess == cudaStat1); + assert(cudaSuccess == cudaStat2); + // CHECK: cudaStat1 = hipMemcpy(d_A, A, sizeof(float)*lda*n, hipMemcpyHostToDevice); + cudaStat1 = cudaMemcpy(d_A, A, sizeof(float)*lda*n, cudaMemcpyHostToDevice); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + /* step 3: query workspace */ + // TODO: status = hipsparseSpruneDense2csrByPercentage_bufferSizeExt( + status = cusparseSpruneDense2csrByPercentage_bufferSizeExt( + handle, + m, + n, + d_A, + lda, + percentage, + descrC, + d_csrValC, + d_csrRowPtrC, + d_csrColIndC, + info, + &lworkInBytes); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + + printf("lworkInBytes = %lld \n", (long long)lworkInBytes); + // CHECK: if (NULL != d_work) { hipFree(d_work); } + if (NULL != d_work) { cudaFree(d_work); } + // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); + cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + + /* step 4: compute csrRowPtrC and nnzC */ + // TODO: status = hipsparseSpruneDense2csrNnzByPercentage( + status = cusparseSpruneDense2csrNnzByPercentage( + handle, + m, + n, + d_A, + lda, + percentage, + descrC, + d_csrRowPtrC, + &nnzC, /* host */ + info, + d_work); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: cudaStat1 = hipDeviceSynchronize(); + cudaStat1 = cudaDeviceSynchronize(); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + + printf("nnzC = %d\n", nnzC); + if (0 == nnzC) { + printf("C is empty \n"); + return 0; + } + + /* step 5: compute csrColIndC and csrValC */ + // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); + cudaStat1 = cudaMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); + // CHECK: cudaStat2 = hipMalloc((void**)&d_csrValC, sizeof(float) * nnzC); + cudaStat2 = cudaMalloc((void**)&d_csrValC, sizeof(float) * nnzC); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + // CHECK: assert(hipSuccess == cudaStat2); + assert(cudaSuccess == cudaStat2); + // TODO: status = hipsparseSpruneDense2csrByPercentage( + status = cusparseSpruneDense2csrByPercentage( + handle, + m, + n, + d_A, + lda, + percentage, + descrC, + d_csrValC, + d_csrRowPtrC, + d_csrColIndC, + info, + d_work); + // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); + assert(CUSPARSE_STATUS_SUCCESS == status); + // CHECK: cudaStat1 = hipDeviceSynchronize(); + cudaStat1 = cudaDeviceSynchronize(); + // CHECK: assert(hipSuccess == cudaStat1); + assert(cudaSuccess == cudaStat1); + + /* step 7: output C */ + csrRowPtrC = (int*)malloc(sizeof(int)*(m + 1)); + csrColIndC = (int*)malloc(sizeof(int)*nnzC); + csrValC = (float*)malloc(sizeof(float)*nnzC); + assert(NULL != csrRowPtrC); + assert(NULL != csrColIndC); + assert(NULL != csrValC); + // CHECK: cudaStat1 = hipMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), hipMemcpyDeviceToHost); + cudaStat1 = cudaMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), cudaMemcpyDeviceToHost); + // CHECK: cudaStat2 = hipMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, hipMemcpyDeviceToHost); + cudaStat2 = cudaMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, cudaMemcpyDeviceToHost); + // CHECK: cudaStat3 = hipMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, hipMemcpyDeviceToHost); + cudaStat3 = cudaMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, cudaMemcpyDeviceToHost); + // CHECK: assert(hipSuccess == cudaStat1); + // CHECK: assert(hipSuccess == cudaStat2); + // CHECK: assert(hipSuccess == cudaStat3); + assert(cudaSuccess == cudaStat1); + assert(cudaSuccess == cudaStat2); + assert(cudaSuccess == cudaStat3); + + printCsr(m, n, nnzC, descrC, csrValC, csrRowPtrC, csrColIndC, "C"); + + /* free resources */ + // CHECK: if (d_A) hipFree(d_A); + if (d_A) cudaFree(d_A); + // CHECK: if (d_csrRowPtrC) hipFree(d_csrRowPtrC); + if (d_csrRowPtrC) cudaFree(d_csrRowPtrC); + // CHECK: if (d_csrColIndC) hipFree(d_csrColIndC); + if (d_csrColIndC) cudaFree(d_csrColIndC); + // CHECK: if (d_csrValC) hipFree(d_csrValC); + if (d_csrValC) cudaFree(d_csrValC); + + if (csrRowPtrC) free(csrRowPtrC); + if (csrColIndC) free(csrColIndC); + if (csrValC) free(csrValC); + // CHECK: if (handle) hipsparseDestroy(handle); + if (handle) cusparseDestroy(handle); + // CHECK: if (stream) hipStreamDestroy(stream); + if (stream) cudaStreamDestroy(stream); + // CHECK: if (descrC) hipsparseDestroyMatDescr(descrC); + if (descrC) cusparseDestroyMatDescr(descrC); + // TODO: if (info) hipsparseDestroyPruneInfo(info); + if (info) cusparseDestroyPruneInfo(info); + // CHECK: hipDeviceReset(); + cudaDeviceReset(); + return 0; +} +